001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.util; 020 021import java.io.DataInputStream; 022import java.io.DataOutputStream; 023import java.io.IOException; 024import java.nio.ByteBuffer; 025import java.util.zip.CRC32; 026import java.util.zip.Checksum; 027 028import org.apache.hadoop.classification.InterfaceAudience; 029import org.apache.hadoop.classification.InterfaceStability; 030import org.apache.hadoop.fs.ChecksumException; 031 032/** 033 * This class provides interface and utilities for processing checksums for 034 * DFS data transfers. 035 */ 036@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 037@InterfaceStability.Evolving 038public class DataChecksum implements Checksum { 039 040 // checksum types 041 public static final int CHECKSUM_NULL = 0; 042 public static final int CHECKSUM_CRC32 = 1; 043 public static final int CHECKSUM_CRC32C = 2; 044 public static final int CHECKSUM_DEFAULT = 3; 045 public static final int CHECKSUM_MIXED = 4; 046 047 /** The checksum types */ 048 public static enum Type { 049 NULL (CHECKSUM_NULL, 0), 050 CRC32 (CHECKSUM_CRC32, 4), 051 CRC32C(CHECKSUM_CRC32C, 4), 052 DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum 053 MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum 054 055 public final int id; 056 public final int size; 057 058 private Type(int id, int size) { 059 this.id = id; 060 this.size = size; 061 } 062 063 /** @return the type corresponding to the id. */ 064 public static Type valueOf(int id) { 065 if (id < 0 || id >= values().length) { 066 throw new IllegalArgumentException("id=" + id 067 + " out of range [0, " + values().length + ")"); 068 } 069 return values()[id]; 070 } 071 } 072 073 /** 074 * Create a Crc32 Checksum object. The implementation of the Crc32 algorithm 075 * is chosen depending on the platform. 076 */ 077 public static Checksum newCrc32() { 078 return new CRC32(); 079 } 080 081 public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) { 082 if ( bytesPerChecksum <= 0 ) { 083 return null; 084 } 085 086 switch ( type ) { 087 case NULL : 088 return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum ); 089 case CRC32 : 090 return new DataChecksum(type, newCrc32(), bytesPerChecksum ); 091 case CRC32C: 092 return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum); 093 default: 094 return null; 095 } 096 } 097 098 /** 099 * Creates a DataChecksum from HEADER_LEN bytes from arr[offset]. 100 * @return DataChecksum of the type in the array or null in case of an error. 101 */ 102 public static DataChecksum newDataChecksum( byte bytes[], int offset ) { 103 if (offset < 0 || bytes.length < offset + getChecksumHeaderSize()) { 104 return null; 105 } 106 107 // like readInt(): 108 int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) | 109 ( (bytes[offset+2] & 0xff) << 16 ) | 110 ( (bytes[offset+3] & 0xff) << 8 ) | 111 ( (bytes[offset+4] & 0xff) ); 112 return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum ); 113 } 114 115 /** 116 * This constructs a DataChecksum by reading HEADER_LEN bytes from input 117 * stream <i>in</i> 118 */ 119 public static DataChecksum newDataChecksum( DataInputStream in ) 120 throws IOException { 121 int type = in.readByte(); 122 int bpc = in.readInt(); 123 DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc ); 124 if ( summer == null ) { 125 throw new InvalidChecksumSizeException("Could not create DataChecksum " 126 + "of type " + type + " with bytesPerChecksum " + bpc); 127 } 128 return summer; 129 } 130 131 /** 132 * Writes the checksum header to the output stream <i>out</i>. 133 */ 134 public void writeHeader( DataOutputStream out ) 135 throws IOException { 136 out.writeByte( type.id ); 137 out.writeInt( bytesPerChecksum ); 138 } 139 140 public byte[] getHeader() { 141 byte[] header = new byte[getChecksumHeaderSize()]; 142 header[0] = (byte) (type.id & 0xff); 143 // Writing in buffer just like DataOutput.WriteInt() 144 header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff); 145 header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff); 146 header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff); 147 header[1+3] = (byte) (bytesPerChecksum & 0xff); 148 return header; 149 } 150 151 /** 152 * Writes the current checksum to the stream. 153 * If <i>reset</i> is true, then resets the checksum. 154 * @return number of bytes written. Will be equal to getChecksumSize(); 155 */ 156 public int writeValue( DataOutputStream out, boolean reset ) 157 throws IOException { 158 if ( type.size <= 0 ) { 159 return 0; 160 } 161 162 if ( type.size == 4 ) { 163 out.writeInt( (int) summer.getValue() ); 164 } else { 165 throw new IOException( "Unknown Checksum " + type ); 166 } 167 168 if ( reset ) { 169 reset(); 170 } 171 172 return type.size; 173 } 174 175 /** 176 * Writes the current checksum to a buffer. 177 * If <i>reset</i> is true, then resets the checksum. 178 * @return number of bytes written. Will be equal to getChecksumSize(); 179 */ 180 public int writeValue( byte[] buf, int offset, boolean reset ) 181 throws IOException { 182 if ( type.size <= 0 ) { 183 return 0; 184 } 185 186 if ( type.size == 4 ) { 187 int checksum = (int) summer.getValue(); 188 buf[offset+0] = (byte) ((checksum >>> 24) & 0xff); 189 buf[offset+1] = (byte) ((checksum >>> 16) & 0xff); 190 buf[offset+2] = (byte) ((checksum >>> 8) & 0xff); 191 buf[offset+3] = (byte) (checksum & 0xff); 192 } else { 193 throw new IOException( "Unknown Checksum " + type ); 194 } 195 196 if ( reset ) { 197 reset(); 198 } 199 200 return type.size; 201 } 202 203 /** 204 * Compares the checksum located at buf[offset] with the current checksum. 205 * @return true if the checksum matches and false otherwise. 206 */ 207 public boolean compare( byte buf[], int offset ) { 208 if ( type.size == 4 ) { 209 int checksum = ( (buf[offset+0] & 0xff) << 24 ) | 210 ( (buf[offset+1] & 0xff) << 16 ) | 211 ( (buf[offset+2] & 0xff) << 8 ) | 212 ( (buf[offset+3] & 0xff) ); 213 return checksum == (int) summer.getValue(); 214 } 215 return type.size == 0; 216 } 217 218 private final Type type; 219 private final Checksum summer; 220 private final int bytesPerChecksum; 221 private int inSum = 0; 222 223 private DataChecksum( Type type, Checksum checksum, int chunkSize ) { 224 this.type = type; 225 summer = checksum; 226 bytesPerChecksum = chunkSize; 227 } 228 229 /** @return the checksum algorithm type. */ 230 public Type getChecksumType() { 231 return type; 232 } 233 /** @return the size for a checksum. */ 234 public int getChecksumSize() { 235 return type.size; 236 } 237 /** @return the required checksum size given the data length. */ 238 public int getChecksumSize(int dataSize) { 239 return ((dataSize - 1)/getBytesPerChecksum() + 1) * getChecksumSize(); 240 } 241 public int getBytesPerChecksum() { 242 return bytesPerChecksum; 243 } 244 public int getNumBytesInSum() { 245 return inSum; 246 } 247 248 public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE; 249 static public int getChecksumHeaderSize() { 250 return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int 251 } 252 //Checksum Interface. Just a wrapper around member summer. 253 @Override 254 public long getValue() { 255 return summer.getValue(); 256 } 257 @Override 258 public void reset() { 259 summer.reset(); 260 inSum = 0; 261 } 262 @Override 263 public void update( byte[] b, int off, int len ) { 264 if ( len > 0 ) { 265 summer.update( b, off, len ); 266 inSum += len; 267 } 268 } 269 @Override 270 public void update( int b ) { 271 summer.update( b ); 272 inSum += 1; 273 } 274 275 /** 276 * Verify that the given checksums match the given data. 277 * 278 * The 'mark' of the ByteBuffer parameters may be modified by this function,. 279 * but the position is maintained. 280 * 281 * @param data the DirectByteBuffer pointing to the data to verify. 282 * @param checksums the DirectByteBuffer pointing to a series of stored 283 * checksums 284 * @param fileName the name of the file being read, for error-reporting 285 * @param basePos the file position to which the start of 'data' corresponds 286 * @throws ChecksumException if the checksums do not match 287 */ 288 public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums, 289 String fileName, long basePos) throws ChecksumException { 290 if (type.size == 0) return; 291 292 if (data.hasArray() && checksums.hasArray()) { 293 final int dataOffset = data.arrayOffset() + data.position(); 294 final int crcsOffset = checksums.arrayOffset() + checksums.position(); 295 verifyChunked(type, summer, data.array(), dataOffset, data.remaining(), 296 bytesPerChecksum, checksums.array(), crcsOffset, fileName, basePos); 297 return; 298 } 299 if (NativeCrc32.isAvailable()) { 300 NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data, 301 fileName, basePos); 302 } else { 303 verifyChunked(type, summer, data, bytesPerChecksum, checksums, fileName, 304 basePos); 305 } 306 } 307 308 static void verifyChunked(final Type type, final Checksum algorithm, 309 final ByteBuffer data, final int bytesPerCrc, final ByteBuffer crcs, 310 final String filename, final long basePos) throws ChecksumException { 311 final byte[] bytes = new byte[bytesPerCrc]; 312 final int dataOffset = data.position(); 313 final int dataLength = data.remaining(); 314 data.mark(); 315 crcs.mark(); 316 317 try { 318 int i = 0; 319 for(final int n = dataLength - bytesPerCrc + 1; i < n; i += bytesPerCrc) { 320 data.get(bytes); 321 algorithm.reset(); 322 algorithm.update(bytes, 0, bytesPerCrc); 323 final int computed = (int)algorithm.getValue(); 324 final int expected = crcs.getInt(); 325 326 if (computed != expected) { 327 long errPos = basePos + data.position() - dataOffset - bytesPerCrc; 328 throwChecksumException(type, algorithm, filename, errPos, expected, 329 computed); 330 } 331 } 332 333 final int remainder = dataLength - i; 334 if (remainder > 0) { 335 data.get(bytes, 0, remainder); 336 algorithm.reset(); 337 algorithm.update(bytes, 0, remainder); 338 final int computed = (int)algorithm.getValue(); 339 final int expected = crcs.getInt(); 340 341 if (computed != expected) { 342 long errPos = basePos + data.position() - dataOffset - remainder; 343 throwChecksumException(type, algorithm, filename, errPos, expected, 344 computed); 345 } 346 } 347 } finally { 348 data.reset(); 349 crcs.reset(); 350 } 351 } 352 353 /** 354 * Implementation of chunked verification specifically on byte arrays. This 355 * is to avoid the copy when dealing with ByteBuffers that have array backing. 356 */ 357 static void verifyChunked(final Type type, final Checksum algorithm, 358 final byte[] data, final int dataOffset, final int dataLength, 359 final int bytesPerCrc, final byte[] crcs, final int crcsOffset, 360 final String filename, final long basePos) throws ChecksumException { 361 final int dataEnd = dataOffset + dataLength; 362 int i = dataOffset; 363 int j = crcsOffset; 364 for(final int n = dataEnd-bytesPerCrc+1; i < n; i += bytesPerCrc, j += 4) { 365 algorithm.reset(); 366 algorithm.update(data, i, bytesPerCrc); 367 final int computed = (int)algorithm.getValue(); 368 final int expected = ((crcs[j] << 24) + ((crcs[j + 1] << 24) >>> 8)) 369 + (((crcs[j + 2] << 24) >>> 16) + ((crcs[j + 3] << 24) >>> 24)); 370 371 if (computed != expected) { 372 final long errPos = basePos + i - dataOffset; 373 throwChecksumException(type, algorithm, filename, errPos, expected, 374 computed); 375 } 376 } 377 final int remainder = dataEnd - i; 378 if (remainder > 0) { 379 algorithm.reset(); 380 algorithm.update(data, i, remainder); 381 final int computed = (int)algorithm.getValue(); 382 final int expected = ((crcs[j] << 24) + ((crcs[j + 1] << 24) >>> 8)) 383 + (((crcs[j + 2] << 24) >>> 16) + ((crcs[j + 3] << 24) >>> 24)); 384 385 if (computed != expected) { 386 final long errPos = basePos + i - dataOffset; 387 throwChecksumException(type, algorithm, filename, errPos, expected, 388 computed); 389 } 390 } 391 } 392 393 private static void throwChecksumException(Type type, Checksum algorithm, 394 String filename, long errPos, int expected, int computed) 395 throws ChecksumException { 396 throw new ChecksumException("Checksum " + type 397 + " not matched for file " + filename + " at position "+ errPos 398 + String.format(": expected=%X but computed=%X", expected, computed) 399 + ", algorithm=" + algorithm.getClass().getSimpleName(), errPos); 400 } 401 402 /** 403 * Calculate checksums for the given data. 404 * 405 * The 'mark' of the ByteBuffer parameters may be modified by this function, 406 * but the position is maintained. 407 * 408 * @param data the DirectByteBuffer pointing to the data to checksum. 409 * @param checksums the DirectByteBuffer into which checksums will be 410 * stored. Enough space must be available in this 411 * buffer to put the checksums. 412 */ 413 public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) { 414 if (type.size == 0) return; 415 416 if (data.hasArray() && checksums.hasArray()) { 417 calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(), 418 checksums.array(), checksums.arrayOffset() + checksums.position()); 419 return; 420 } 421 422 if (NativeCrc32.isAvailable()) { 423 NativeCrc32.calculateChunkedSums(bytesPerChecksum, type.id, 424 checksums, data); 425 return; 426 } 427 428 data.mark(); 429 checksums.mark(); 430 try { 431 byte[] buf = new byte[bytesPerChecksum]; 432 while (data.remaining() > 0) { 433 int n = Math.min(data.remaining(), bytesPerChecksum); 434 data.get(buf, 0, n); 435 summer.reset(); 436 summer.update(buf, 0, n); 437 checksums.putInt((int)summer.getValue()); 438 } 439 } finally { 440 data.reset(); 441 checksums.reset(); 442 } 443 } 444 445 /** 446 * Implementation of chunked calculation specifically on byte arrays. This 447 * is to avoid the copy when dealing with ByteBuffers that have array backing. 448 */ 449 public void calculateChunkedSums( 450 byte[] data, int dataOffset, int dataLength, 451 byte[] sums, int sumsOffset) { 452 if (type.size == 0) return; 453 454 if (NativeCrc32.isAvailable()) { 455 NativeCrc32.calculateChunkedSumsByteArray(bytesPerChecksum, type.id, 456 sums, sumsOffset, data, dataOffset, dataLength); 457 return; 458 } 459 460 int remaining = dataLength; 461 while (remaining > 0) { 462 int n = Math.min(remaining, bytesPerChecksum); 463 summer.reset(); 464 summer.update(data, dataOffset, n); 465 dataOffset += n; 466 remaining -= n; 467 long calculated = summer.getValue(); 468 sums[sumsOffset++] = (byte) (calculated >> 24); 469 sums[sumsOffset++] = (byte) (calculated >> 16); 470 sums[sumsOffset++] = (byte) (calculated >> 8); 471 sums[sumsOffset++] = (byte) (calculated); 472 } 473 } 474 475 @Override 476 public boolean equals(Object other) { 477 if (!(other instanceof DataChecksum)) { 478 return false; 479 } 480 DataChecksum o = (DataChecksum)other; 481 return o.bytesPerChecksum == this.bytesPerChecksum && 482 o.type == this.type; 483 } 484 485 @Override 486 public int hashCode() { 487 return (this.type.id + 31) * this.bytesPerChecksum; 488 } 489 490 @Override 491 public String toString() { 492 return "DataChecksum(type=" + type + 493 ", chunkSize=" + bytesPerChecksum + ")"; 494 } 495 496 /** 497 * This just provides a dummy implimentation for Checksum class 498 * This is used when there is no checksum available or required for 499 * data 500 */ 501 static class ChecksumNull implements Checksum { 502 503 public ChecksumNull() {} 504 505 //Dummy interface 506 @Override 507 public long getValue() { return 0; } 508 @Override 509 public void reset() {} 510 @Override 511 public void update(byte[] b, int off, int len) {} 512 @Override 513 public void update(int b) {} 514 }; 515}