001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.fs;
020    
021    import java.io.*;
022    import java.util.Arrays;
023    
024    import org.apache.hadoop.classification.InterfaceAudience;
025    import org.apache.hadoop.classification.InterfaceStability;
026    import org.apache.hadoop.conf.Configuration;
027    import org.apache.hadoop.fs.permission.FsPermission;
028    import org.apache.hadoop.util.Progressable;
029    import org.apache.hadoop.util.PureJavaCrc32;
030    
031    /****************************************************************
032     * Abstract Checksumed FileSystem.
033     * It provide a basic implementation of a Checksumed FileSystem,
034     * which creates a checksum file for each raw file.
035     * It generates & verifies checksums at the client side.
036     *
037     *****************************************************************/
038    @InterfaceAudience.Public
039    @InterfaceStability.Stable
040    public abstract class ChecksumFileSystem extends FilterFileSystem {
041      private static final byte[] CHECKSUM_VERSION = new byte[] {'c', 'r', 'c', 0};
042      private int bytesPerChecksum = 512;
043      private boolean verifyChecksum = true;
044      private boolean writeChecksum = true;
045    
046      public static double getApproxChkSumLength(long size) {
047        return ChecksumFSOutputSummer.CHKSUM_AS_FRACTION * size;
048      }
049      
050      public ChecksumFileSystem(FileSystem fs) {
051        super(fs);
052      }
053    
054      @Override
055      public void setConf(Configuration conf) {
056        super.setConf(conf);
057        if (conf != null) {
058          bytesPerChecksum = conf.getInt(LocalFileSystemConfigKeys.LOCAL_FS_BYTES_PER_CHECKSUM_KEY,
059                                         LocalFileSystemConfigKeys.LOCAL_FS_BYTES_PER_CHECKSUM_DEFAULT);
060        }
061      }
062      
063      /**
064       * Set whether to verify checksum.
065       */
066      @Override
067      public void setVerifyChecksum(boolean verifyChecksum) {
068        this.verifyChecksum = verifyChecksum;
069      }
070    
071      @Override
072      public void setWriteChecksum(boolean writeChecksum) {
073        this.writeChecksum = writeChecksum;
074      }
075      
076      /** get the raw file system */
077      @Override
078      public FileSystem getRawFileSystem() {
079        return fs;
080      }
081    
082      /** Return the name of the checksum file associated with a file.*/
083      public Path getChecksumFile(Path file) {
084        return new Path(file.getParent(), "." + file.getName() + ".crc");
085      }
086    
087      /** Return true iff file is a checksum file name.*/
088      public static boolean isChecksumFile(Path file) {
089        String name = file.getName();
090        return name.startsWith(".") && name.endsWith(".crc");
091      }
092    
093      /** Return the length of the checksum file given the size of the 
094       * actual file.
095       **/
096      public long getChecksumFileLength(Path file, long fileSize) {
097        return getChecksumLength(fileSize, getBytesPerSum());
098      }
099    
100      /** Return the bytes Per Checksum */
101      public int getBytesPerSum() {
102        return bytesPerChecksum;
103      }
104    
105      private int getSumBufferSize(int bytesPerSum, int bufferSize) {
106        int defaultBufferSize = getConf().getInt(
107                           LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_KEY,
108                           LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_DEFAULT);
109        int proportionalBufferSize = bufferSize / bytesPerSum;
110        return Math.max(bytesPerSum,
111                        Math.max(proportionalBufferSize, defaultBufferSize));
112      }
113    
114      /*******************************************************
115       * For open()'s FSInputStream
116       * It verifies that data matches checksums.
117       *******************************************************/
118      private static class ChecksumFSInputChecker extends FSInputChecker {
119        private ChecksumFileSystem fs;
120        private FSDataInputStream datas;
121        private FSDataInputStream sums;
122        
123        private static final int HEADER_LENGTH = 8;
124        
125        private int bytesPerSum = 1;
126        
127        public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file)
128          throws IOException {
129          this(fs, file, fs.getConf().getInt(
130                           LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_KEY, 
131                           LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_DEFAULT));
132        }
133        
134        public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file, int bufferSize)
135          throws IOException {
136          super( file, fs.getFileStatus(file).getReplication() );
137          this.datas = fs.getRawFileSystem().open(file, bufferSize);
138          this.fs = fs;
139          Path sumFile = fs.getChecksumFile(file);
140          try {
141            int sumBufferSize = fs.getSumBufferSize(fs.getBytesPerSum(), bufferSize);
142            sums = fs.getRawFileSystem().open(sumFile, sumBufferSize);
143    
144            byte[] version = new byte[CHECKSUM_VERSION.length];
145            sums.readFully(version);
146            if (!Arrays.equals(version, CHECKSUM_VERSION))
147              throw new IOException("Not a checksum file: "+sumFile);
148            this.bytesPerSum = sums.readInt();
149            set(fs.verifyChecksum, new PureJavaCrc32(), bytesPerSum, 4);
150          } catch (FileNotFoundException e) {         // quietly ignore
151            set(fs.verifyChecksum, null, 1, 0);
152          } catch (IOException e) {                   // loudly ignore
153            LOG.warn("Problem opening checksum file: "+ file + 
154                     ".  Ignoring exception: " , e); 
155            set(fs.verifyChecksum, null, 1, 0);
156          }
157        }
158        
159        private long getChecksumFilePos( long dataPos ) {
160          return HEADER_LENGTH + 4*(dataPos/bytesPerSum);
161        }
162        
163        @Override
164        protected long getChunkPosition( long dataPos ) {
165          return dataPos/bytesPerSum*bytesPerSum;
166        }
167        
168        @Override
169        public int available() throws IOException {
170          return datas.available() + super.available();
171        }
172        
173        @Override
174        public int read(long position, byte[] b, int off, int len)
175          throws IOException {
176          // parameter check
177          if ((off | len | (off + len) | (b.length - (off + len))) < 0) {
178            throw new IndexOutOfBoundsException();
179          } else if (len == 0) {
180            return 0;
181          }
182          if( position<0 ) {
183            throw new IllegalArgumentException(
184                "Parameter position can not to be negative");
185          }
186    
187          ChecksumFSInputChecker checker = new ChecksumFSInputChecker(fs, file);
188          checker.seek(position);
189          int nread = checker.read(b, off, len);
190          checker.close();
191          return nread;
192        }
193        
194        @Override
195        public void close() throws IOException {
196          datas.close();
197          if( sums != null ) {
198            sums.close();
199          }
200          set(fs.verifyChecksum, null, 1, 0);
201        }
202        
203    
204        @Override
205        public boolean seekToNewSource(long targetPos) throws IOException {
206          long sumsPos = getChecksumFilePos(targetPos);
207          fs.reportChecksumFailure(file, datas, targetPos, sums, sumsPos);
208          boolean newDataSource = datas.seekToNewSource(targetPos);
209          return sums.seekToNewSource(sumsPos) || newDataSource;
210        }
211    
212        @Override
213        protected int readChunk(long pos, byte[] buf, int offset, int len,
214            byte[] checksum) throws IOException {
215    
216          boolean eof = false;
217          if (needChecksum()) {
218            assert checksum != null; // we have a checksum buffer
219            assert checksum.length % CHECKSUM_SIZE == 0; // it is sane length
220            assert len >= bytesPerSum; // we must read at least one chunk
221    
222            final int checksumsToRead = Math.min(
223              len/bytesPerSum, // number of checksums based on len to read
224              checksum.length / CHECKSUM_SIZE); // size of checksum buffer
225            long checksumPos = getChecksumFilePos(pos); 
226            if(checksumPos != sums.getPos()) {
227              sums.seek(checksumPos);
228            }
229    
230            int sumLenRead = sums.read(checksum, 0, CHECKSUM_SIZE * checksumsToRead);
231            if (sumLenRead >= 0 && sumLenRead % CHECKSUM_SIZE != 0) {
232              throw new ChecksumException(
233                "Checksum file not a length multiple of checksum size " +
234                "in " + file + " at " + pos + " checksumpos: " + checksumPos +
235                " sumLenread: " + sumLenRead,
236                pos);
237            }
238            if (sumLenRead <= 0) { // we're at the end of the file
239              eof = true;
240            } else {
241              // Adjust amount of data to read based on how many checksum chunks we read
242              len = Math.min(len, bytesPerSum * (sumLenRead / CHECKSUM_SIZE));
243            }
244          }
245          if(pos != datas.getPos()) {
246            datas.seek(pos);
247          }
248          int nread = readFully(datas, buf, offset, len);
249          if (eof && nread > 0) {
250            throw new ChecksumException("Checksum error: "+file+" at "+pos, pos);
251          }
252          return nread;
253        }
254      }
255      
256      private static class FSDataBoundedInputStream extends FSDataInputStream {
257        private FileSystem fs;
258        private Path file;
259        private long fileLen = -1L;
260    
261        FSDataBoundedInputStream(FileSystem fs, Path file, InputStream in)
262            throws IOException {
263          super(in);
264          this.fs = fs;
265          this.file = file;
266        }
267        
268        @Override
269        public boolean markSupported() {
270          return false;
271        }
272        
273        /* Return the file length */
274        private long getFileLength() throws IOException {
275          if( fileLen==-1L ) {
276            fileLen = fs.getContentSummary(file).getLength();
277          }
278          return fileLen;
279        }
280        
281        /**
282         * Skips over and discards <code>n</code> bytes of data from the
283         * input stream.
284         *
285         *The <code>skip</code> method skips over some smaller number of bytes
286         * when reaching end of file before <code>n</code> bytes have been skipped.
287         * The actual number of bytes skipped is returned.  If <code>n</code> is
288         * negative, no bytes are skipped.
289         *
290         * @param      n   the number of bytes to be skipped.
291         * @return     the actual number of bytes skipped.
292         * @exception  IOException  if an I/O error occurs.
293         *             ChecksumException if the chunk to skip to is corrupted
294         */
295        @Override
296        public synchronized long skip(long n) throws IOException {
297          long curPos = getPos();
298          long fileLength = getFileLength();
299          if( n+curPos > fileLength ) {
300            n = fileLength - curPos;
301          }
302          return super.skip(n);
303        }
304        
305        /**
306         * Seek to the given position in the stream.
307         * The next read() will be from that position.
308         * 
309         * <p>This method does not allow seek past the end of the file.
310         * This produces IOException.
311         *
312         * @param      pos   the postion to seek to.
313         * @exception  IOException  if an I/O error occurs or seeks after EOF
314         *             ChecksumException if the chunk to seek to is corrupted
315         */
316    
317        @Override
318        public synchronized void seek(long pos) throws IOException {
319          if(pos>getFileLength()) {
320            throw new IOException("Cannot seek after EOF");
321          }
322          super.seek(pos);
323        }
324    
325      }
326    
327      /**
328       * Opens an FSDataInputStream at the indicated Path.
329       * @param f the file name to open
330       * @param bufferSize the size of the buffer to be used.
331       */
332      @Override
333      public FSDataInputStream open(Path f, int bufferSize) throws IOException {
334        FileSystem fs;
335        InputStream in;
336        if (verifyChecksum) {
337          fs = this;
338          in = new ChecksumFSInputChecker(this, f, bufferSize);
339        } else {
340          fs = getRawFileSystem();
341          in = fs.open(f, bufferSize);
342        }
343        return new FSDataBoundedInputStream(fs, f, in);
344      }
345    
346      @Override
347      public FSDataOutputStream append(Path f, int bufferSize,
348          Progressable progress) throws IOException {
349        throw new IOException("Not supported");
350      }
351    
352      /**
353       * Calculated the length of the checksum file in bytes.
354       * @param size the length of the data file in bytes
355       * @param bytesPerSum the number of bytes in a checksum block
356       * @return the number of bytes in the checksum file
357       */
358      public static long getChecksumLength(long size, int bytesPerSum) {
359        //the checksum length is equal to size passed divided by bytesPerSum +
360        //bytes written in the beginning of the checksum file.  
361        return ((size + bytesPerSum - 1) / bytesPerSum) * 4 +
362                 CHECKSUM_VERSION.length + 4;  
363      }
364    
365      /** This class provides an output stream for a checksummed file.
366       * It generates checksums for data. */
367      private static class ChecksumFSOutputSummer extends FSOutputSummer {
368        private FSDataOutputStream datas;    
369        private FSDataOutputStream sums;
370        private static final float CHKSUM_AS_FRACTION = 0.01f;
371        
372        public ChecksumFSOutputSummer(ChecksumFileSystem fs, 
373                              Path file, 
374                              boolean overwrite,
375                              int bufferSize,
376                              short replication,
377                              long blockSize,
378                              Progressable progress)
379          throws IOException {
380          super(new PureJavaCrc32(), fs.getBytesPerSum(), 4);
381          int bytesPerSum = fs.getBytesPerSum();
382          this.datas = fs.getRawFileSystem().create(file, overwrite, bufferSize, 
383                                             replication, blockSize, progress);
384          int sumBufferSize = fs.getSumBufferSize(bytesPerSum, bufferSize);
385          this.sums = fs.getRawFileSystem().create(fs.getChecksumFile(file), true, 
386                                                   sumBufferSize, replication,
387                                                   blockSize);
388          sums.write(CHECKSUM_VERSION, 0, CHECKSUM_VERSION.length);
389          sums.writeInt(bytesPerSum);
390        }
391        
392        @Override
393        public void close() throws IOException {
394          flushBuffer();
395          sums.close();
396          datas.close();
397        }
398        
399        @Override
400        protected void writeChunk(byte[] b, int offset, int len, byte[] checksum)
401        throws IOException {
402          datas.write(b, offset, len);
403          sums.write(checksum);
404        }
405      }
406    
407      @Override
408      public FSDataOutputStream create(Path f, FsPermission permission,
409          boolean overwrite, int bufferSize, short replication, long blockSize,
410          Progressable progress) throws IOException {
411        return create(f, permission, overwrite, true, bufferSize,
412            replication, blockSize, progress);
413      }
414    
415      private FSDataOutputStream create(Path f, FsPermission permission,
416          boolean overwrite, boolean createParent, int bufferSize,
417          short replication, long blockSize,
418          Progressable progress) throws IOException {
419        Path parent = f.getParent();
420        if (parent != null) {
421          if (!createParent && !exists(parent)) {
422            throw new FileNotFoundException("Parent directory doesn't exist: "
423                + parent);
424          } else if (!mkdirs(parent)) {
425            throw new IOException("Mkdirs failed to create " + parent);
426          }
427        }
428        final FSDataOutputStream out;
429        if (writeChecksum) {
430          out = new FSDataOutputStream(
431              new ChecksumFSOutputSummer(this, f, overwrite, bufferSize, replication,
432                  blockSize, progress), null);
433        } else {
434          out = fs.create(f, permission, overwrite, bufferSize, replication,
435              blockSize, progress);
436          // remove the checksum file since we aren't writing one
437          Path checkFile = getChecksumFile(f);
438          if (fs.exists(checkFile)) {
439            fs.delete(checkFile, true);
440          }
441        }
442        if (permission != null) {
443          setPermission(f, permission);
444        }
445        return out;
446      }
447    
448      @Override
449      public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
450          boolean overwrite, int bufferSize, short replication, long blockSize,
451          Progressable progress) throws IOException {
452        return create(f, permission, overwrite, false, bufferSize, replication,
453            blockSize, progress);
454      }
455    
456      /**
457       * Set replication for an existing file.
458       * Implement the abstract <tt>setReplication</tt> of <tt>FileSystem</tt>
459       * @param src file name
460       * @param replication new replication
461       * @throws IOException
462       * @return true if successful;
463       *         false if file does not exist or is a directory
464       */
465      @Override
466      public boolean setReplication(Path src, short replication) throws IOException {
467        boolean value = fs.setReplication(src, replication);
468        if (!value)
469          return false;
470    
471        Path checkFile = getChecksumFile(src);
472        if (exists(checkFile))
473          fs.setReplication(checkFile, replication);
474    
475        return true;
476      }
477    
478      /**
479       * Rename files/dirs
480       */
481      @Override
482      public boolean rename(Path src, Path dst) throws IOException {
483        if (fs.isDirectory(src)) {
484          return fs.rename(src, dst);
485        } else {
486          if (fs.isDirectory(dst)) {
487            dst = new Path(dst, src.getName());
488          }
489    
490          boolean value = fs.rename(src, dst);
491          if (!value)
492            return false;
493    
494          Path srcCheckFile = getChecksumFile(src);
495          Path dstCheckFile = getChecksumFile(dst);
496          if (fs.exists(srcCheckFile)) { //try to rename checksum
497            value = fs.rename(srcCheckFile, dstCheckFile);
498          } else if (fs.exists(dstCheckFile)) {
499            // no src checksum, so remove dst checksum
500            value = fs.delete(dstCheckFile, true); 
501          }
502    
503          return value;
504        }
505      }
506    
507      /**
508       * Implement the delete(Path, boolean) in checksum
509       * file system.
510       */
511      @Override
512      public boolean delete(Path f, boolean recursive) throws IOException{
513        FileStatus fstatus = null;
514        try {
515          fstatus = fs.getFileStatus(f);
516        } catch(FileNotFoundException e) {
517          return false;
518        }
519        if (fstatus.isDirectory()) {
520          //this works since the crcs are in the same
521          //directories and the files. so we just delete
522          //everything in the underlying filesystem
523          return fs.delete(f, recursive);
524        } else {
525          Path checkFile = getChecksumFile(f);
526          if (fs.exists(checkFile)) {
527            fs.delete(checkFile, true);
528          }
529          return fs.delete(f, true);
530        }
531      }
532        
533      final private static PathFilter DEFAULT_FILTER = new PathFilter() {
534        @Override
535        public boolean accept(Path file) {
536          return !isChecksumFile(file);
537        }
538      };
539    
540      /**
541       * List the statuses of the files/directories in the given path if the path is
542       * a directory.
543       * 
544       * @param f
545       *          given path
546       * @return the statuses of the files/directories in the given patch
547       * @throws IOException
548       */
549      @Override
550      public FileStatus[] listStatus(Path f) throws IOException {
551        return fs.listStatus(f, DEFAULT_FILTER);
552      }
553      
554      /**
555       * List the statuses of the files/directories in the given path if the path is
556       * a directory.
557       * 
558       * @param f
559       *          given path
560       * @return the statuses of the files/directories in the given patch
561       * @throws IOException
562       */
563      @Override
564      public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f)
565      throws IOException {
566        return fs.listLocatedStatus(f, DEFAULT_FILTER);
567      }
568      
569      @Override
570      public boolean mkdirs(Path f) throws IOException {
571        return fs.mkdirs(f);
572      }
573    
574      @Override
575      public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
576        throws IOException {
577        Configuration conf = getConf();
578        FileUtil.copy(getLocal(conf), src, this, dst, delSrc, conf);
579      }
580    
581      /**
582       * The src file is under FS, and the dst is on the local disk.
583       * Copy it from FS control to the local dst name.
584       */
585      @Override
586      public void copyToLocalFile(boolean delSrc, Path src, Path dst)
587        throws IOException {
588        Configuration conf = getConf();
589        FileUtil.copy(this, src, getLocal(conf), dst, delSrc, conf);
590      }
591    
592      /**
593       * The src file is under FS, and the dst is on the local disk.
594       * Copy it from FS control to the local dst name.
595       * If src and dst are directories, the copyCrc parameter
596       * determines whether to copy CRC files.
597       */
598      public void copyToLocalFile(Path src, Path dst, boolean copyCrc)
599        throws IOException {
600        if (!fs.isDirectory(src)) { // source is a file
601          fs.copyToLocalFile(src, dst);
602          FileSystem localFs = getLocal(getConf()).getRawFileSystem();
603          if (localFs.isDirectory(dst)) {
604            dst = new Path(dst, src.getName());
605          }
606          dst = getChecksumFile(dst);
607          if (localFs.exists(dst)) { //remove old local checksum file
608            localFs.delete(dst, true);
609          }
610          Path checksumFile = getChecksumFile(src);
611          if (copyCrc && fs.exists(checksumFile)) { //copy checksum file
612            fs.copyToLocalFile(checksumFile, dst);
613          }
614        } else {
615          FileStatus[] srcs = listStatus(src);
616          for (FileStatus srcFile : srcs) {
617            copyToLocalFile(srcFile.getPath(), 
618                            new Path(dst, srcFile.getPath().getName()), copyCrc);
619          }
620        }
621      }
622    
623      @Override
624      public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
625        throws IOException {
626        return tmpLocalFile;
627      }
628    
629      @Override
630      public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
631        throws IOException {
632        moveFromLocalFile(tmpLocalFile, fsOutputFile);
633      }
634    
635      /**
636       * Report a checksum error to the file system.
637       * @param f the file name containing the error
638       * @param in the stream open on the file
639       * @param inPos the position of the beginning of the bad data in the file
640       * @param sums the stream open on the checksum file
641       * @param sumsPos the position of the beginning of the bad data in the checksum file
642       * @return if retry is neccessary
643       */
644      public boolean reportChecksumFailure(Path f, FSDataInputStream in,
645                                           long inPos, FSDataInputStream sums, long sumsPos) {
646        return false;
647      }
648    }