001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.util;
020    
021    import java.io.DataInputStream;
022    import java.io.DataOutputStream;
023    import java.io.IOException;
024    import java.nio.ByteBuffer;
025    import java.util.zip.Checksum;
026    
027    import org.apache.hadoop.classification.InterfaceAudience;
028    import org.apache.hadoop.classification.InterfaceStability;
029    import org.apache.hadoop.fs.ChecksumException;
030    
031    /**
032     * This class provides inteface and utilities for processing checksums for
033     * DFS data transfers.
034     */
035    @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
036    @InterfaceStability.Evolving
037    public class DataChecksum implements Checksum {
038      
039      // Misc constants
040      public static final int HEADER_LEN = 5; /// 1 byte type and 4 byte len
041      
042      // checksum types
043      public static final int CHECKSUM_NULL    = 0;
044      public static final int CHECKSUM_CRC32   = 1;
045      public static final int CHECKSUM_CRC32C  = 2;
046      public static final int CHECKSUM_DEFAULT = 3; 
047      public static final int CHECKSUM_MIXED   = 4;
048    
049      /** The checksum types */
050      public static enum Type {
051        NULL  (CHECKSUM_NULL, 0),
052        CRC32 (CHECKSUM_CRC32, 4),
053        CRC32C(CHECKSUM_CRC32C, 4),
054        DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
055        MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
056    
057        public final int id;
058        public final int size;
059        
060        private Type(int id, int size) {
061          this.id = id;
062          this.size = size;
063        }
064    
065        /** @return the type corresponding to the id. */
066        public static Type valueOf(int id) {
067          if (id < 0 || id >= values().length) {
068            throw new IllegalArgumentException("id=" + id
069                + " out of range [0, " + values().length + ")");
070          }
071          return values()[id];
072        }
073      }
074    
075    
076      public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) {
077        if ( bytesPerChecksum <= 0 ) {
078          return null;
079        }
080        
081        switch ( type ) {
082        case NULL :
083          return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum );
084        case CRC32 :
085          return new DataChecksum(type, new PureJavaCrc32(), bytesPerChecksum );
086        case CRC32C:
087          return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum);
088        default:
089          return null;  
090        }
091      }
092      
093      /**
094       * Creates a DataChecksum from HEADER_LEN bytes from arr[offset].
095       * @return DataChecksum of the type in the array or null in case of an error.
096       */
097      public static DataChecksum newDataChecksum( byte bytes[], int offset ) {
098        if ( offset < 0 || bytes.length < offset + HEADER_LEN ) {
099          return null;
100        }
101        
102        // like readInt():
103        int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) | 
104                               ( (bytes[offset+2] & 0xff) << 16 ) |
105                               ( (bytes[offset+3] & 0xff) << 8 )  |
106                               ( (bytes[offset+4] & 0xff) );
107        return newDataChecksum( Type.valueOf(bytes[0]), bytesPerChecksum );
108      }
109      
110      /**
111       * This constructucts a DataChecksum by reading HEADER_LEN bytes from
112       * input stream <i>in</i>
113       */
114      public static DataChecksum newDataChecksum( DataInputStream in )
115                                     throws IOException {
116        int type = in.readByte();
117        int bpc = in.readInt();
118        DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc );
119        if ( summer == null ) {
120          throw new IOException( "Could not create DataChecksum of type " +
121                                 type + " with bytesPerChecksum " + bpc );
122        }
123        return summer;
124      }
125      
126      /**
127       * Writes the checksum header to the output stream <i>out</i>.
128       */
129      public void writeHeader( DataOutputStream out ) 
130                               throws IOException { 
131        out.writeByte( type.id );
132        out.writeInt( bytesPerChecksum );
133      }
134    
135      public byte[] getHeader() {
136        byte[] header = new byte[DataChecksum.HEADER_LEN];
137        header[0] = (byte) (type.id & 0xff);
138        // Writing in buffer just like DataOutput.WriteInt()
139        header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff);
140        header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff);
141        header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff);
142        header[1+3] = (byte) (bytesPerChecksum & 0xff);
143        return header;
144      }
145      
146      /**
147       * Writes the current checksum to the stream.
148       * If <i>reset</i> is true, then resets the checksum.
149       * @return number of bytes written. Will be equal to getChecksumSize();
150       */
151       public int writeValue( DataOutputStream out, boolean reset )
152                              throws IOException {
153         if ( type.size <= 0 ) {
154           return 0;
155         }
156    
157         if ( type.size == 4 ) {
158           out.writeInt( (int) summer.getValue() );
159         } else {
160           throw new IOException( "Unknown Checksum " + type );
161         }
162         
163         if ( reset ) {
164           reset();
165         }
166         
167         return type.size;
168       }
169       
170       /**
171        * Writes the current checksum to a buffer.
172        * If <i>reset</i> is true, then resets the checksum.
173        * @return number of bytes written. Will be equal to getChecksumSize();
174        */
175        public int writeValue( byte[] buf, int offset, boolean reset )
176                               throws IOException {
177          if ( type.size <= 0 ) {
178            return 0;
179          }
180    
181          if ( type.size == 4 ) {
182            int checksum = (int) summer.getValue();
183            buf[offset+0] = (byte) ((checksum >>> 24) & 0xff);
184            buf[offset+1] = (byte) ((checksum >>> 16) & 0xff);
185            buf[offset+2] = (byte) ((checksum >>> 8) & 0xff);
186            buf[offset+3] = (byte) (checksum & 0xff);
187          } else {
188            throw new IOException( "Unknown Checksum " + type );
189          }
190          
191          if ( reset ) {
192            reset();
193          }
194          
195          return type.size;
196        }
197       
198       /**
199        * Compares the checksum located at buf[offset] with the current checksum.
200        * @return true if the checksum matches and false otherwise.
201        */
202       public boolean compare( byte buf[], int offset ) {
203         if ( type.size == 4 ) {
204           int checksum = ( (buf[offset+0] & 0xff) << 24 ) | 
205                          ( (buf[offset+1] & 0xff) << 16 ) |
206                          ( (buf[offset+2] & 0xff) << 8 )  |
207                          ( (buf[offset+3] & 0xff) );
208           return checksum == (int) summer.getValue();
209         }
210         return type.size == 0;
211       }
212       
213      private final Type type;
214      private final Checksum summer;
215      private final int bytesPerChecksum;
216      private int inSum = 0;
217      
218      private DataChecksum( Type type, Checksum checksum, int chunkSize ) {
219        this.type = type;
220        summer = checksum;
221        bytesPerChecksum = chunkSize;
222      }
223      
224      // Accessors
225      public Type getChecksumType() {
226        return type;
227      }
228      public int getChecksumSize() {
229        return type.size;
230      }
231      public int getBytesPerChecksum() {
232        return bytesPerChecksum;
233      }
234      public int getNumBytesInSum() {
235        return inSum;
236      }
237      
238      public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE;
239      static public int getChecksumHeaderSize() {
240        return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int
241      }
242      //Checksum Interface. Just a wrapper around member summer.
243      public long getValue() {
244        return summer.getValue();
245      }
246      public void reset() {
247        summer.reset();
248        inSum = 0;
249      }
250      public void update( byte[] b, int off, int len ) {
251        if ( len > 0 ) {
252          summer.update( b, off, len );
253          inSum += len;
254        }
255      }
256      public void update( int b ) {
257        summer.update( b );
258        inSum += 1;
259      }
260      
261      /**
262       * Verify that the given checksums match the given data.
263       * 
264       * The 'mark' of the ByteBuffer parameters may be modified by this function,.
265       * but the position is maintained.
266       *  
267       * @param data the DirectByteBuffer pointing to the data to verify.
268       * @param checksums the DirectByteBuffer pointing to a series of stored
269       *                  checksums
270       * @param fileName the name of the file being read, for error-reporting
271       * @param basePos the file position to which the start of 'data' corresponds
272       * @throws ChecksumException if the checksums do not match
273       */
274      public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums,
275          String fileName, long basePos)
276      throws ChecksumException {
277        if (type.size == 0) return;
278        
279        if (data.hasArray() && checksums.hasArray()) {
280          verifyChunkedSums(
281              data.array(), data.arrayOffset() + data.position(), data.remaining(),
282              checksums.array(), checksums.arrayOffset() + checksums.position(),
283              fileName, basePos);
284          return;
285        }
286        if (NativeCrc32.isAvailable()) {
287          NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data,
288              fileName, basePos);
289          return;
290        }
291        
292        int startDataPos = data.position();
293        data.mark();
294        checksums.mark();
295        try {
296          byte[] buf = new byte[bytesPerChecksum];
297          byte[] sum = new byte[type.size];
298          while (data.remaining() > 0) {
299            int n = Math.min(data.remaining(), bytesPerChecksum);
300            checksums.get(sum);
301            data.get(buf, 0, n);
302            summer.reset();
303            summer.update(buf, 0, n);
304            int calculated = (int)summer.getValue();
305            int stored = (sum[0] << 24 & 0xff000000) |
306              (sum[1] << 16 & 0xff0000) |
307              (sum[2] << 8 & 0xff00) |
308              sum[3] & 0xff;
309            if (calculated != stored) {
310              long errPos = basePos + data.position() - startDataPos - n;
311              throw new ChecksumException(
312                  "Checksum error: "+ fileName + " at "+ errPos +
313                  " exp: " + stored + " got: " + calculated, errPos);
314            }
315          }
316        } finally {
317          data.reset();
318          checksums.reset();
319        }
320      }
321      
322      /**
323       * Implementation of chunked verification specifically on byte arrays. This
324       * is to avoid the copy when dealing with ByteBuffers that have array backing.
325       */
326      private void verifyChunkedSums(
327          byte[] data, int dataOff, int dataLen,
328          byte[] checksums, int checksumsOff, String fileName,
329          long basePos) throws ChecksumException {
330        
331        int remaining = dataLen;
332        int dataPos = 0;
333        while (remaining > 0) {
334          int n = Math.min(remaining, bytesPerChecksum);
335          
336          summer.reset();
337          summer.update(data, dataOff + dataPos, n);
338          dataPos += n;
339          remaining -= n;
340          
341          int calculated = (int)summer.getValue();
342          int stored = (checksums[checksumsOff] << 24 & 0xff000000) |
343            (checksums[checksumsOff + 1] << 16 & 0xff0000) |
344            (checksums[checksumsOff + 2] << 8 & 0xff00) |
345            checksums[checksumsOff + 3] & 0xff;
346          checksumsOff += 4;
347          if (calculated != stored) {
348            long errPos = basePos + dataPos - n;
349            throw new ChecksumException(
350                "Checksum error: "+ fileName + " at "+ errPos +
351                " exp: " + stored + " got: " + calculated, errPos);
352          }
353        }
354      }
355    
356      /**
357       * Calculate checksums for the given data.
358       * 
359       * The 'mark' of the ByteBuffer parameters may be modified by this function,
360       * but the position is maintained.
361       * 
362       * @param data the DirectByteBuffer pointing to the data to checksum.
363       * @param checksums the DirectByteBuffer into which checksums will be
364       *                  stored. Enough space must be available in this
365       *                  buffer to put the checksums.
366       */
367      public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) {
368        if (type.size == 0) return;
369        
370        if (data.hasArray() && checksums.hasArray()) {
371          calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(),
372              checksums.array(), checksums.arrayOffset() + checksums.position());
373          return;
374        }
375        
376        data.mark();
377        checksums.mark();
378        try {
379          byte[] buf = new byte[bytesPerChecksum];
380          while (data.remaining() > 0) {
381            int n = Math.min(data.remaining(), bytesPerChecksum);
382            data.get(buf, 0, n);
383            summer.reset();
384            summer.update(buf, 0, n);
385            checksums.putInt((int)summer.getValue());
386          }
387        } finally {
388          data.reset();
389          checksums.reset();
390        }
391      }
392    
393      /**
394       * Implementation of chunked calculation specifically on byte arrays. This
395       * is to avoid the copy when dealing with ByteBuffers that have array backing.
396       */
397      private void calculateChunkedSums(
398          byte[] data, int dataOffset, int dataLength,
399          byte[] sums, int sumsOffset) {
400    
401        int remaining = dataLength;
402        while (remaining > 0) {
403          int n = Math.min(remaining, bytesPerChecksum);
404          summer.reset();
405          summer.update(data, dataOffset, n);
406          dataOffset += n;
407          remaining -= n;
408          long calculated = summer.getValue();
409          sums[sumsOffset++] = (byte) (calculated >> 24);
410          sums[sumsOffset++] = (byte) (calculated >> 16);
411          sums[sumsOffset++] = (byte) (calculated >> 8);
412          sums[sumsOffset++] = (byte) (calculated);
413        }
414      }
415    
416      @Override
417      public boolean equals(Object other) {
418        if (!(other instanceof DataChecksum)) {
419          return false;
420        }
421        DataChecksum o = (DataChecksum)other;
422        return o.bytesPerChecksum == this.bytesPerChecksum &&
423          o.type == this.type;
424      }
425      
426      @Override
427      public int hashCode() {
428        return (this.type.id + 31) * this.bytesPerChecksum;
429      }
430      
431      @Override
432      public String toString() {
433        return "DataChecksum(type=" + type +
434          ", chunkSize=" + bytesPerChecksum + ")";
435      }
436      
437      /**
438       * This just provides a dummy implimentation for Checksum class
439       * This is used when there is no checksum available or required for 
440       * data
441       */
442      static class ChecksumNull implements Checksum {
443        
444        public ChecksumNull() {}
445        
446        //Dummy interface
447        public long getValue() { return 0; }
448        public void reset() {}
449        public void update(byte[] b, int off, int len) {}
450        public void update(int b) {}
451      };
452    }