001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.util; 020 021 import java.io.DataInputStream; 022 import java.io.DataOutputStream; 023 import java.io.IOException; 024 import java.nio.ByteBuffer; 025 import java.util.zip.Checksum; 026 027 import org.apache.hadoop.classification.InterfaceAudience; 028 import org.apache.hadoop.classification.InterfaceStability; 029 import org.apache.hadoop.fs.ChecksumException; 030 031 /** 032 * This class provides inteface and utilities for processing checksums for 033 * DFS data transfers. 034 */ 035 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"}) 036 @InterfaceStability.Evolving 037 public class DataChecksum implements Checksum { 038 039 // Misc constants 040 public static final int HEADER_LEN = 5; /// 1 byte type and 4 byte len 041 042 // checksum types 043 public static final int CHECKSUM_NULL = 0; 044 public static final int CHECKSUM_CRC32 = 1; 045 public static final int CHECKSUM_CRC32C = 2; 046 public static final int CHECKSUM_DEFAULT = 3; 047 public static final int CHECKSUM_MIXED = 4; 048 049 /** The checksum types */ 050 public static enum Type { 051 NULL (CHECKSUM_NULL, 0), 052 CRC32 (CHECKSUM_CRC32, 4), 053 CRC32C(CHECKSUM_CRC32C, 4), 054 DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum 055 MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum 056 057 public final int id; 058 public final int size; 059 060 private Type(int id, int size) { 061 this.id = id; 062 this.size = size; 063 } 064 065 /** @return the type corresponding to the id. */ 066 public static Type valueOf(int id) { 067 if (id < 0 || id >= values().length) { 068 throw new IllegalArgumentException("id=" + id 069 + " out of range [0, " + values().length + ")"); 070 } 071 return values()[id]; 072 } 073 } 074 075 076 public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) { 077 if ( bytesPerChecksum <= 0 ) { 078 return null; 079 } 080 081 switch ( type ) { 082 case NULL : 083 return new DataChecksum(type, new ChecksumNull(), bytesPerChecksum ); 084 case CRC32 : 085 return new DataChecksum(type, new PureJavaCrc32(), bytesPerChecksum ); 086 case CRC32C: 087 return new DataChecksum(type, new PureJavaCrc32C(), bytesPerChecksum); 088 default: 089 return null; 090 } 091 } 092 093 /** 094 * Creates a DataChecksum from HEADER_LEN bytes from arr[offset]. 095 * @return DataChecksum of the type in the array or null in case of an error. 096 */ 097 public static DataChecksum newDataChecksum( byte bytes[], int offset ) { 098 if ( offset < 0 || bytes.length < offset + HEADER_LEN ) { 099 return null; 100 } 101 102 // like readInt(): 103 int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) | 104 ( (bytes[offset+2] & 0xff) << 16 ) | 105 ( (bytes[offset+3] & 0xff) << 8 ) | 106 ( (bytes[offset+4] & 0xff) ); 107 return newDataChecksum( Type.valueOf(bytes[0]), bytesPerChecksum ); 108 } 109 110 /** 111 * This constructucts a DataChecksum by reading HEADER_LEN bytes from 112 * input stream <i>in</i> 113 */ 114 public static DataChecksum newDataChecksum( DataInputStream in ) 115 throws IOException { 116 int type = in.readByte(); 117 int bpc = in.readInt(); 118 DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc ); 119 if ( summer == null ) { 120 throw new IOException( "Could not create DataChecksum of type " + 121 type + " with bytesPerChecksum " + bpc ); 122 } 123 return summer; 124 } 125 126 /** 127 * Writes the checksum header to the output stream <i>out</i>. 128 */ 129 public void writeHeader( DataOutputStream out ) 130 throws IOException { 131 out.writeByte( type.id ); 132 out.writeInt( bytesPerChecksum ); 133 } 134 135 public byte[] getHeader() { 136 byte[] header = new byte[DataChecksum.HEADER_LEN]; 137 header[0] = (byte) (type.id & 0xff); 138 // Writing in buffer just like DataOutput.WriteInt() 139 header[1+0] = (byte) ((bytesPerChecksum >>> 24) & 0xff); 140 header[1+1] = (byte) ((bytesPerChecksum >>> 16) & 0xff); 141 header[1+2] = (byte) ((bytesPerChecksum >>> 8) & 0xff); 142 header[1+3] = (byte) (bytesPerChecksum & 0xff); 143 return header; 144 } 145 146 /** 147 * Writes the current checksum to the stream. 148 * If <i>reset</i> is true, then resets the checksum. 149 * @return number of bytes written. Will be equal to getChecksumSize(); 150 */ 151 public int writeValue( DataOutputStream out, boolean reset ) 152 throws IOException { 153 if ( type.size <= 0 ) { 154 return 0; 155 } 156 157 if ( type.size == 4 ) { 158 out.writeInt( (int) summer.getValue() ); 159 } else { 160 throw new IOException( "Unknown Checksum " + type ); 161 } 162 163 if ( reset ) { 164 reset(); 165 } 166 167 return type.size; 168 } 169 170 /** 171 * Writes the current checksum to a buffer. 172 * If <i>reset</i> is true, then resets the checksum. 173 * @return number of bytes written. Will be equal to getChecksumSize(); 174 */ 175 public int writeValue( byte[] buf, int offset, boolean reset ) 176 throws IOException { 177 if ( type.size <= 0 ) { 178 return 0; 179 } 180 181 if ( type.size == 4 ) { 182 int checksum = (int) summer.getValue(); 183 buf[offset+0] = (byte) ((checksum >>> 24) & 0xff); 184 buf[offset+1] = (byte) ((checksum >>> 16) & 0xff); 185 buf[offset+2] = (byte) ((checksum >>> 8) & 0xff); 186 buf[offset+3] = (byte) (checksum & 0xff); 187 } else { 188 throw new IOException( "Unknown Checksum " + type ); 189 } 190 191 if ( reset ) { 192 reset(); 193 } 194 195 return type.size; 196 } 197 198 /** 199 * Compares the checksum located at buf[offset] with the current checksum. 200 * @return true if the checksum matches and false otherwise. 201 */ 202 public boolean compare( byte buf[], int offset ) { 203 if ( type.size == 4 ) { 204 int checksum = ( (buf[offset+0] & 0xff) << 24 ) | 205 ( (buf[offset+1] & 0xff) << 16 ) | 206 ( (buf[offset+2] & 0xff) << 8 ) | 207 ( (buf[offset+3] & 0xff) ); 208 return checksum == (int) summer.getValue(); 209 } 210 return type.size == 0; 211 } 212 213 private final Type type; 214 private final Checksum summer; 215 private final int bytesPerChecksum; 216 private int inSum = 0; 217 218 private DataChecksum( Type type, Checksum checksum, int chunkSize ) { 219 this.type = type; 220 summer = checksum; 221 bytesPerChecksum = chunkSize; 222 } 223 224 // Accessors 225 public Type getChecksumType() { 226 return type; 227 } 228 public int getChecksumSize() { 229 return type.size; 230 } 231 public int getBytesPerChecksum() { 232 return bytesPerChecksum; 233 } 234 public int getNumBytesInSum() { 235 return inSum; 236 } 237 238 public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE; 239 static public int getChecksumHeaderSize() { 240 return 1 + SIZE_OF_INTEGER; // type byte, bytesPerChecksum int 241 } 242 //Checksum Interface. Just a wrapper around member summer. 243 public long getValue() { 244 return summer.getValue(); 245 } 246 public void reset() { 247 summer.reset(); 248 inSum = 0; 249 } 250 public void update( byte[] b, int off, int len ) { 251 if ( len > 0 ) { 252 summer.update( b, off, len ); 253 inSum += len; 254 } 255 } 256 public void update( int b ) { 257 summer.update( b ); 258 inSum += 1; 259 } 260 261 /** 262 * Verify that the given checksums match the given data. 263 * 264 * The 'mark' of the ByteBuffer parameters may be modified by this function,. 265 * but the position is maintained. 266 * 267 * @param data the DirectByteBuffer pointing to the data to verify. 268 * @param checksums the DirectByteBuffer pointing to a series of stored 269 * checksums 270 * @param fileName the name of the file being read, for error-reporting 271 * @param basePos the file position to which the start of 'data' corresponds 272 * @throws ChecksumException if the checksums do not match 273 */ 274 public void verifyChunkedSums(ByteBuffer data, ByteBuffer checksums, 275 String fileName, long basePos) 276 throws ChecksumException { 277 if (type.size == 0) return; 278 279 if (data.hasArray() && checksums.hasArray()) { 280 verifyChunkedSums( 281 data.array(), data.arrayOffset() + data.position(), data.remaining(), 282 checksums.array(), checksums.arrayOffset() + checksums.position(), 283 fileName, basePos); 284 return; 285 } 286 if (NativeCrc32.isAvailable()) { 287 NativeCrc32.verifyChunkedSums(bytesPerChecksum, type.id, checksums, data, 288 fileName, basePos); 289 return; 290 } 291 292 int startDataPos = data.position(); 293 data.mark(); 294 checksums.mark(); 295 try { 296 byte[] buf = new byte[bytesPerChecksum]; 297 byte[] sum = new byte[type.size]; 298 while (data.remaining() > 0) { 299 int n = Math.min(data.remaining(), bytesPerChecksum); 300 checksums.get(sum); 301 data.get(buf, 0, n); 302 summer.reset(); 303 summer.update(buf, 0, n); 304 int calculated = (int)summer.getValue(); 305 int stored = (sum[0] << 24 & 0xff000000) | 306 (sum[1] << 16 & 0xff0000) | 307 (sum[2] << 8 & 0xff00) | 308 sum[3] & 0xff; 309 if (calculated != stored) { 310 long errPos = basePos + data.position() - startDataPos - n; 311 throw new ChecksumException( 312 "Checksum error: "+ fileName + " at "+ errPos + 313 " exp: " + stored + " got: " + calculated, errPos); 314 } 315 } 316 } finally { 317 data.reset(); 318 checksums.reset(); 319 } 320 } 321 322 /** 323 * Implementation of chunked verification specifically on byte arrays. This 324 * is to avoid the copy when dealing with ByteBuffers that have array backing. 325 */ 326 private void verifyChunkedSums( 327 byte[] data, int dataOff, int dataLen, 328 byte[] checksums, int checksumsOff, String fileName, 329 long basePos) throws ChecksumException { 330 331 int remaining = dataLen; 332 int dataPos = 0; 333 while (remaining > 0) { 334 int n = Math.min(remaining, bytesPerChecksum); 335 336 summer.reset(); 337 summer.update(data, dataOff + dataPos, n); 338 dataPos += n; 339 remaining -= n; 340 341 int calculated = (int)summer.getValue(); 342 int stored = (checksums[checksumsOff] << 24 & 0xff000000) | 343 (checksums[checksumsOff + 1] << 16 & 0xff0000) | 344 (checksums[checksumsOff + 2] << 8 & 0xff00) | 345 checksums[checksumsOff + 3] & 0xff; 346 checksumsOff += 4; 347 if (calculated != stored) { 348 long errPos = basePos + dataPos - n; 349 throw new ChecksumException( 350 "Checksum error: "+ fileName + " at "+ errPos + 351 " exp: " + stored + " got: " + calculated, errPos); 352 } 353 } 354 } 355 356 /** 357 * Calculate checksums for the given data. 358 * 359 * The 'mark' of the ByteBuffer parameters may be modified by this function, 360 * but the position is maintained. 361 * 362 * @param data the DirectByteBuffer pointing to the data to checksum. 363 * @param checksums the DirectByteBuffer into which checksums will be 364 * stored. Enough space must be available in this 365 * buffer to put the checksums. 366 */ 367 public void calculateChunkedSums(ByteBuffer data, ByteBuffer checksums) { 368 if (type.size == 0) return; 369 370 if (data.hasArray() && checksums.hasArray()) { 371 calculateChunkedSums(data.array(), data.arrayOffset() + data.position(), data.remaining(), 372 checksums.array(), checksums.arrayOffset() + checksums.position()); 373 return; 374 } 375 376 data.mark(); 377 checksums.mark(); 378 try { 379 byte[] buf = new byte[bytesPerChecksum]; 380 while (data.remaining() > 0) { 381 int n = Math.min(data.remaining(), bytesPerChecksum); 382 data.get(buf, 0, n); 383 summer.reset(); 384 summer.update(buf, 0, n); 385 checksums.putInt((int)summer.getValue()); 386 } 387 } finally { 388 data.reset(); 389 checksums.reset(); 390 } 391 } 392 393 /** 394 * Implementation of chunked calculation specifically on byte arrays. This 395 * is to avoid the copy when dealing with ByteBuffers that have array backing. 396 */ 397 private void calculateChunkedSums( 398 byte[] data, int dataOffset, int dataLength, 399 byte[] sums, int sumsOffset) { 400 401 int remaining = dataLength; 402 while (remaining > 0) { 403 int n = Math.min(remaining, bytesPerChecksum); 404 summer.reset(); 405 summer.update(data, dataOffset, n); 406 dataOffset += n; 407 remaining -= n; 408 long calculated = summer.getValue(); 409 sums[sumsOffset++] = (byte) (calculated >> 24); 410 sums[sumsOffset++] = (byte) (calculated >> 16); 411 sums[sumsOffset++] = (byte) (calculated >> 8); 412 sums[sumsOffset++] = (byte) (calculated); 413 } 414 } 415 416 @Override 417 public boolean equals(Object other) { 418 if (!(other instanceof DataChecksum)) { 419 return false; 420 } 421 DataChecksum o = (DataChecksum)other; 422 return o.bytesPerChecksum == this.bytesPerChecksum && 423 o.type == this.type; 424 } 425 426 @Override 427 public int hashCode() { 428 return (this.type.id + 31) * this.bytesPerChecksum; 429 } 430 431 @Override 432 public String toString() { 433 return "DataChecksum(type=" + type + 434 ", chunkSize=" + bytesPerChecksum + ")"; 435 } 436 437 /** 438 * This just provides a dummy implimentation for Checksum class 439 * This is used when there is no checksum available or required for 440 * data 441 */ 442 static class ChecksumNull implements Checksum { 443 444 public ChecksumNull() {} 445 446 //Dummy interface 447 public long getValue() { return 0; } 448 public void reset() {} 449 public void update(byte[] b, int off, int len) {} 450 public void update(int b) {} 451 }; 452 }