001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.io; 020 021import java.io.EOFException; 022import java.io.IOException; 023import java.util.ArrayList; 024import java.util.Arrays; 025 026import org.apache.commons.logging.Log; 027import org.apache.commons.logging.LogFactory; 028import org.apache.hadoop.classification.InterfaceAudience; 029import org.apache.hadoop.classification.InterfaceStability; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.fs.FileSystem; 032import org.apache.hadoop.fs.Path; 033import org.apache.hadoop.io.SequenceFile.CompressionType; 034import org.apache.hadoop.io.compress.CompressionCodec; 035import org.apache.hadoop.util.Options; 036import org.apache.hadoop.util.Progressable; 037import org.apache.hadoop.util.ReflectionUtils; 038 039/** A file-based map from keys to values. 040 * 041 * <p>A map is a directory containing two files, the <code>data</code> file, 042 * containing all keys and values in the map, and a smaller <code>index</code> 043 * file, containing a fraction of the keys. The fraction is determined by 044 * {@link Writer#getIndexInterval()}. 045 * 046 * <p>The index file is read entirely into memory. Thus key implementations 047 * should try to keep themselves small. 048 * 049 * <p>Map files are created by adding entries in-order. To maintain a large 050 * database, perform updates by copying the previous version of a database and 051 * merging in a sorted change list, to create a new version of the database in 052 * a new file. Sorting large change lists can be done with {@link 053 * SequenceFile.Sorter}. 054 */ 055@InterfaceAudience.Public 056@InterfaceStability.Stable 057public class MapFile { 058 private static final Log LOG = LogFactory.getLog(MapFile.class); 059 060 /** The name of the index file. */ 061 public static final String INDEX_FILE_NAME = "index"; 062 063 /** The name of the data file. */ 064 public static final String DATA_FILE_NAME = "data"; 065 066 protected MapFile() {} // no public ctor 067 068 /** Writes a new map. */ 069 public static class Writer implements java.io.Closeable { 070 private SequenceFile.Writer data; 071 private SequenceFile.Writer index; 072 073 final private static String INDEX_INTERVAL = "io.map.index.interval"; 074 private int indexInterval = 128; 075 076 private long size; 077 private LongWritable position = new LongWritable(); 078 079 // the following fields are used only for checking key order 080 private WritableComparator comparator; 081 private DataInputBuffer inBuf = new DataInputBuffer(); 082 private DataOutputBuffer outBuf = new DataOutputBuffer(); 083 private WritableComparable lastKey; 084 085 /** What's the position (in bytes) we wrote when we got the last index */ 086 private long lastIndexPos = -1; 087 088 /** 089 * What was size when we last wrote an index. Set to MIN_VALUE to ensure that 090 * we have an index at position zero -- midKey will throw an exception if this 091 * is not the case 092 */ 093 private long lastIndexKeyCount = Long.MIN_VALUE; 094 095 096 /** Create the named map for keys of the named class. 097 * @deprecated Use Writer(Configuration, Path, Option...) instead. 098 */ 099 @Deprecated 100 public Writer(Configuration conf, FileSystem fs, String dirName, 101 Class<? extends WritableComparable> keyClass, 102 Class valClass) throws IOException { 103 this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass)); 104 } 105 106 /** Create the named map for keys of the named class. 107 * @deprecated Use Writer(Configuration, Path, Option...) instead. 108 */ 109 @Deprecated 110 public Writer(Configuration conf, FileSystem fs, String dirName, 111 Class<? extends WritableComparable> keyClass, Class valClass, 112 CompressionType compress, 113 Progressable progress) throws IOException { 114 this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass), 115 compression(compress), progressable(progress)); 116 } 117 118 /** Create the named map for keys of the named class. 119 * @deprecated Use Writer(Configuration, Path, Option...) instead. 120 */ 121 @Deprecated 122 public Writer(Configuration conf, FileSystem fs, String dirName, 123 Class<? extends WritableComparable> keyClass, Class valClass, 124 CompressionType compress, CompressionCodec codec, 125 Progressable progress) throws IOException { 126 this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass), 127 compression(compress, codec), progressable(progress)); 128 } 129 130 /** Create the named map for keys of the named class. 131 * @deprecated Use Writer(Configuration, Path, Option...) instead. 132 */ 133 @Deprecated 134 public Writer(Configuration conf, FileSystem fs, String dirName, 135 Class<? extends WritableComparable> keyClass, Class valClass, 136 CompressionType compress) throws IOException { 137 this(conf, new Path(dirName), keyClass(keyClass), 138 valueClass(valClass), compression(compress)); 139 } 140 141 /** Create the named map using the named key comparator. 142 * @deprecated Use Writer(Configuration, Path, Option...) instead. 143 */ 144 @Deprecated 145 public Writer(Configuration conf, FileSystem fs, String dirName, 146 WritableComparator comparator, Class valClass 147 ) throws IOException { 148 this(conf, new Path(dirName), comparator(comparator), 149 valueClass(valClass)); 150 } 151 152 /** Create the named map using the named key comparator. 153 * @deprecated Use Writer(Configuration, Path, Option...) instead. 154 */ 155 @Deprecated 156 public Writer(Configuration conf, FileSystem fs, String dirName, 157 WritableComparator comparator, Class valClass, 158 SequenceFile.CompressionType compress) throws IOException { 159 this(conf, new Path(dirName), comparator(comparator), 160 valueClass(valClass), compression(compress)); 161 } 162 163 /** Create the named map using the named key comparator. 164 * @deprecated Use Writer(Configuration, Path, Option...)} instead. 165 */ 166 @Deprecated 167 public Writer(Configuration conf, FileSystem fs, String dirName, 168 WritableComparator comparator, Class valClass, 169 SequenceFile.CompressionType compress, 170 Progressable progress) throws IOException { 171 this(conf, new Path(dirName), comparator(comparator), 172 valueClass(valClass), compression(compress), 173 progressable(progress)); 174 } 175 176 /** Create the named map using the named key comparator. 177 * @deprecated Use Writer(Configuration, Path, Option...) instead. 178 */ 179 @Deprecated 180 public Writer(Configuration conf, FileSystem fs, String dirName, 181 WritableComparator comparator, Class valClass, 182 SequenceFile.CompressionType compress, CompressionCodec codec, 183 Progressable progress) throws IOException { 184 this(conf, new Path(dirName), comparator(comparator), 185 valueClass(valClass), compression(compress, codec), 186 progressable(progress)); 187 } 188 189 // our options are a superset of sequence file writer options 190 public static interface Option extends SequenceFile.Writer.Option { } 191 192 private static class KeyClassOption extends Options.ClassOption 193 implements Option { 194 KeyClassOption(Class<?> value) { 195 super(value); 196 } 197 } 198 199 private static class ComparatorOption implements Option { 200 private final WritableComparator value; 201 ComparatorOption(WritableComparator value) { 202 this.value = value; 203 } 204 WritableComparator getValue() { 205 return value; 206 } 207 } 208 209 public static Option keyClass(Class<? extends WritableComparable> value) { 210 return new KeyClassOption(value); 211 } 212 213 public static Option comparator(WritableComparator value) { 214 return new ComparatorOption(value); 215 } 216 217 public static SequenceFile.Writer.Option valueClass(Class<?> value) { 218 return SequenceFile.Writer.valueClass(value); 219 } 220 221 public static 222 SequenceFile.Writer.Option compression(CompressionType type) { 223 return SequenceFile.Writer.compression(type); 224 } 225 226 public static 227 SequenceFile.Writer.Option compression(CompressionType type, 228 CompressionCodec codec) { 229 return SequenceFile.Writer.compression(type, codec); 230 } 231 232 public static SequenceFile.Writer.Option progressable(Progressable value) { 233 return SequenceFile.Writer.progressable(value); 234 } 235 236 @SuppressWarnings("unchecked") 237 public Writer(Configuration conf, 238 Path dirName, 239 SequenceFile.Writer.Option... opts 240 ) throws IOException { 241 KeyClassOption keyClassOption = 242 Options.getOption(KeyClassOption.class, opts); 243 ComparatorOption comparatorOption = 244 Options.getOption(ComparatorOption.class, opts); 245 if ((keyClassOption == null) == (comparatorOption == null)) { 246 throw new IllegalArgumentException("key class or comparator option " 247 + "must be set"); 248 } 249 this.indexInterval = conf.getInt(INDEX_INTERVAL, this.indexInterval); 250 251 Class<? extends WritableComparable> keyClass; 252 if (keyClassOption == null) { 253 this.comparator = comparatorOption.getValue(); 254 keyClass = comparator.getKeyClass(); 255 } else { 256 keyClass= 257 (Class<? extends WritableComparable>) keyClassOption.getValue(); 258 this.comparator = WritableComparator.get(keyClass); 259 } 260 this.lastKey = comparator.newKey(); 261 FileSystem fs = dirName.getFileSystem(conf); 262 263 if (!fs.mkdirs(dirName)) { 264 throw new IOException("Mkdirs failed to create directory " + dirName); 265 } 266 Path dataFile = new Path(dirName, DATA_FILE_NAME); 267 Path indexFile = new Path(dirName, INDEX_FILE_NAME); 268 269 SequenceFile.Writer.Option[] dataOptions = 270 Options.prependOptions(opts, 271 SequenceFile.Writer.file(dataFile), 272 SequenceFile.Writer.keyClass(keyClass)); 273 this.data = SequenceFile.createWriter(conf, dataOptions); 274 275 SequenceFile.Writer.Option[] indexOptions = 276 Options.prependOptions(opts, SequenceFile.Writer.file(indexFile), 277 SequenceFile.Writer.keyClass(keyClass), 278 SequenceFile.Writer.valueClass(LongWritable.class), 279 SequenceFile.Writer.compression(CompressionType.BLOCK)); 280 this.index = SequenceFile.createWriter(conf, indexOptions); 281 } 282 283 /** The number of entries that are added before an index entry is added.*/ 284 public int getIndexInterval() { return indexInterval; } 285 286 /** Sets the index interval. 287 * @see #getIndexInterval() 288 */ 289 public void setIndexInterval(int interval) { indexInterval = interval; } 290 291 /** Sets the index interval and stores it in conf 292 * @see #getIndexInterval() 293 */ 294 public static void setIndexInterval(Configuration conf, int interval) { 295 conf.setInt(INDEX_INTERVAL, interval); 296 } 297 298 /** Close the map. */ 299 @Override 300 public synchronized void close() throws IOException { 301 data.close(); 302 index.close(); 303 } 304 305 /** Append a key/value pair to the map. The key must be greater or equal 306 * to the previous key added to the map. */ 307 public synchronized void append(WritableComparable key, Writable val) 308 throws IOException { 309 310 checkKey(key); 311 312 long pos = data.getLength(); 313 // Only write an index if we've changed positions. In a block compressed 314 // file, this means we write an entry at the start of each block 315 if (size >= lastIndexKeyCount + indexInterval && pos > lastIndexPos) { 316 position.set(pos); // point to current eof 317 index.append(key, position); 318 lastIndexPos = pos; 319 lastIndexKeyCount = size; 320 } 321 322 data.append(key, val); // append key/value to data 323 size++; 324 } 325 326 private void checkKey(WritableComparable key) throws IOException { 327 // check that keys are well-ordered 328 if (size != 0 && comparator.compare(lastKey, key) > 0) 329 throw new IOException("key out of order: "+key+" after "+lastKey); 330 331 // update lastKey with a copy of key by writing and reading 332 outBuf.reset(); 333 key.write(outBuf); // write new key 334 335 inBuf.reset(outBuf.getData(), outBuf.getLength()); 336 lastKey.readFields(inBuf); // read into lastKey 337 } 338 339 } 340 341 /** Provide access to an existing map. */ 342 public static class Reader implements java.io.Closeable { 343 344 /** Number of index entries to skip between each entry. Zero by default. 345 * Setting this to values larger than zero can facilitate opening large map 346 * files using less memory. */ 347 private int INDEX_SKIP = 0; 348 349 private WritableComparator comparator; 350 351 private WritableComparable nextKey; 352 private long seekPosition = -1; 353 private int seekIndex = -1; 354 private long firstPosition; 355 356 // the data, on disk 357 private SequenceFile.Reader data; 358 private SequenceFile.Reader index; 359 360 // whether the index Reader was closed 361 private boolean indexClosed = false; 362 363 // the index, in memory 364 private int count = -1; 365 private WritableComparable[] keys; 366 private long[] positions; 367 368 /** Returns the class of keys in this file. */ 369 public Class<?> getKeyClass() { return data.getKeyClass(); } 370 371 /** Returns the class of values in this file. */ 372 public Class<?> getValueClass() { return data.getValueClass(); } 373 374 public static interface Option extends SequenceFile.Reader.Option {} 375 376 public static Option comparator(WritableComparator value) { 377 return new ComparatorOption(value); 378 } 379 380 static class ComparatorOption implements Option { 381 private final WritableComparator value; 382 ComparatorOption(WritableComparator value) { 383 this.value = value; 384 } 385 WritableComparator getValue() { 386 return value; 387 } 388 } 389 390 public Reader(Path dir, Configuration conf, 391 SequenceFile.Reader.Option... opts) throws IOException { 392 ComparatorOption comparatorOption = 393 Options.getOption(ComparatorOption.class, opts); 394 WritableComparator comparator = 395 comparatorOption == null ? null : comparatorOption.getValue(); 396 INDEX_SKIP = conf.getInt("io.map.index.skip", 0); 397 open(dir, comparator, conf, opts); 398 } 399 400 /** Construct a map reader for the named map. 401 * @deprecated 402 */ 403 @Deprecated 404 public Reader(FileSystem fs, String dirName, 405 Configuration conf) throws IOException { 406 this(new Path(dirName), conf); 407 } 408 409 /** Construct a map reader for the named map using the named comparator. 410 * @deprecated 411 */ 412 @Deprecated 413 public Reader(FileSystem fs, String dirName, WritableComparator comparator, 414 Configuration conf) throws IOException { 415 this(new Path(dirName), conf, comparator(comparator)); 416 } 417 418 protected synchronized void open(Path dir, 419 WritableComparator comparator, 420 Configuration conf, 421 SequenceFile.Reader.Option... options 422 ) throws IOException { 423 Path dataFile = new Path(dir, DATA_FILE_NAME); 424 Path indexFile = new Path(dir, INDEX_FILE_NAME); 425 426 // open the data 427 this.data = createDataFileReader(dataFile, conf, options); 428 this.firstPosition = data.getPosition(); 429 430 if (comparator == null) 431 this.comparator = 432 WritableComparator.get(data.getKeyClass(). 433 asSubclass(WritableComparable.class)); 434 else 435 this.comparator = comparator; 436 437 // open the index 438 SequenceFile.Reader.Option[] indexOptions = 439 Options.prependOptions(options, SequenceFile.Reader.file(indexFile)); 440 this.index = new SequenceFile.Reader(conf, indexOptions); 441 } 442 443 /** 444 * Override this method to specialize the type of 445 * {@link SequenceFile.Reader} returned. 446 */ 447 protected SequenceFile.Reader 448 createDataFileReader(Path dataFile, Configuration conf, 449 SequenceFile.Reader.Option... options 450 ) throws IOException { 451 SequenceFile.Reader.Option[] newOptions = 452 Options.prependOptions(options, SequenceFile.Reader.file(dataFile)); 453 return new SequenceFile.Reader(conf, newOptions); 454 } 455 456 private void readIndex() throws IOException { 457 // read the index entirely into memory 458 if (this.keys != null) 459 return; 460 this.count = 0; 461 this.positions = new long[1024]; 462 463 try { 464 int skip = INDEX_SKIP; 465 LongWritable position = new LongWritable(); 466 WritableComparable lastKey = null; 467 long lastIndex = -1; 468 ArrayList<WritableComparable> keyBuilder = new ArrayList<WritableComparable>(1024); 469 while (true) { 470 WritableComparable k = comparator.newKey(); 471 472 if (!index.next(k, position)) 473 break; 474 475 // check order to make sure comparator is compatible 476 if (lastKey != null && comparator.compare(lastKey, k) > 0) 477 throw new IOException("key out of order: "+k+" after "+lastKey); 478 lastKey = k; 479 if (skip > 0) { 480 skip--; 481 continue; // skip this entry 482 } else { 483 skip = INDEX_SKIP; // reset skip 484 } 485 486 // don't read an index that is the same as the previous one. Block 487 // compressed map files used to do this (multiple entries would point 488 // at the same block) 489 if (position.get() == lastIndex) 490 continue; 491 492 if (count == positions.length) { 493 positions = Arrays.copyOf(positions, positions.length * 2); 494 } 495 496 keyBuilder.add(k); 497 positions[count] = position.get(); 498 count++; 499 } 500 501 this.keys = keyBuilder.toArray(new WritableComparable[count]); 502 positions = Arrays.copyOf(positions, count); 503 } catch (EOFException e) { 504 LOG.warn("Unexpected EOF reading " + index + 505 " at entry #" + count + ". Ignoring."); 506 } finally { 507 indexClosed = true; 508 index.close(); 509 } 510 } 511 512 /** Re-positions the reader before its first key. */ 513 public synchronized void reset() throws IOException { 514 data.seek(firstPosition); 515 } 516 517 /** Get the key at approximately the middle of the file. Or null if the 518 * file is empty. 519 */ 520 public synchronized WritableComparable midKey() throws IOException { 521 522 readIndex(); 523 if (count == 0) { 524 return null; 525 } 526 527 return keys[(count - 1) / 2]; 528 } 529 530 /** Reads the final key from the file. 531 * 532 * @param key key to read into 533 */ 534 public synchronized void finalKey(WritableComparable key) 535 throws IOException { 536 537 long originalPosition = data.getPosition(); // save position 538 try { 539 readIndex(); // make sure index is valid 540 if (count > 0) { 541 data.seek(positions[count-1]); // skip to last indexed entry 542 } else { 543 reset(); // start at the beginning 544 } 545 while (data.next(key)) {} // scan to eof 546 547 } finally { 548 data.seek(originalPosition); // restore position 549 } 550 } 551 552 /** Positions the reader at the named key, or if none such exists, at the 553 * first entry after the named key. Returns true iff the named key exists 554 * in this map. 555 */ 556 public synchronized boolean seek(WritableComparable key) throws IOException { 557 return seekInternal(key) == 0; 558 } 559 560 /** 561 * Positions the reader at the named key, or if none such exists, at the 562 * first entry after the named key. 563 * 564 * @return 0 - exact match found 565 * < 0 - positioned at next record 566 * 1 - no more records in file 567 */ 568 private synchronized int seekInternal(WritableComparable key) 569 throws IOException { 570 return seekInternal(key, false); 571 } 572 573 /** 574 * Positions the reader at the named key, or if none such exists, at the 575 * key that falls just before or just after dependent on how the 576 * <code>before</code> parameter is set. 577 * 578 * @param before - IF true, and <code>key</code> does not exist, position 579 * file at entry that falls just before <code>key</code>. Otherwise, 580 * position file at record that sorts just after. 581 * @return 0 - exact match found 582 * < 0 - positioned at next record 583 * 1 - no more records in file 584 */ 585 private synchronized int seekInternal(WritableComparable key, 586 final boolean before) 587 throws IOException { 588 readIndex(); // make sure index is read 589 590 if (seekIndex != -1 // seeked before 591 && seekIndex+1 < count 592 && comparator.compare(key, keys[seekIndex+1])<0 // before next indexed 593 && comparator.compare(key, nextKey) 594 >= 0) { // but after last seeked 595 // do nothing 596 } else { 597 seekIndex = binarySearch(key); 598 if (seekIndex < 0) // decode insertion point 599 seekIndex = -seekIndex-2; 600 601 if (seekIndex == -1) // belongs before first entry 602 seekPosition = firstPosition; // use beginning of file 603 else 604 seekPosition = positions[seekIndex]; // else use index 605 } 606 data.seek(seekPosition); 607 608 if (nextKey == null) 609 nextKey = comparator.newKey(); 610 611 // If we're looking for the key before, we need to keep track 612 // of the position we got the current key as well as the position 613 // of the key before it. 614 long prevPosition = -1; 615 long curPosition = seekPosition; 616 617 while (data.next(nextKey)) { 618 int c = comparator.compare(key, nextKey); 619 if (c <= 0) { // at or beyond desired 620 if (before && c != 0) { 621 if (prevPosition == -1) { 622 // We're on the first record of this index block 623 // and we've already passed the search key. Therefore 624 // we must be at the beginning of the file, so seek 625 // to the beginning of this block and return c 626 data.seek(curPosition); 627 } else { 628 // We have a previous record to back up to 629 data.seek(prevPosition); 630 data.next(nextKey); 631 // now that we've rewound, the search key must be greater than this key 632 return 1; 633 } 634 } 635 return c; 636 } 637 if (before) { 638 prevPosition = curPosition; 639 curPosition = data.getPosition(); 640 } 641 } 642 643 return 1; 644 } 645 646 private int binarySearch(WritableComparable key) { 647 int low = 0; 648 int high = count-1; 649 650 while (low <= high) { 651 int mid = (low + high) >>> 1; 652 WritableComparable midVal = keys[mid]; 653 int cmp = comparator.compare(midVal, key); 654 655 if (cmp < 0) 656 low = mid + 1; 657 else if (cmp > 0) 658 high = mid - 1; 659 else 660 return mid; // key found 661 } 662 return -(low + 1); // key not found. 663 } 664 665 /** Read the next key/value pair in the map into <code>key</code> and 666 * <code>val</code>. Returns true if such a pair exists and false when at 667 * the end of the map */ 668 public synchronized boolean next(WritableComparable key, Writable val) 669 throws IOException { 670 return data.next(key, val); 671 } 672 673 /** Return the value for the named key, or null if none exists. */ 674 public synchronized Writable get(WritableComparable key, Writable val) 675 throws IOException { 676 if (seek(key)) { 677 data.getCurrentValue(val); 678 return val; 679 } else 680 return null; 681 } 682 683 /** 684 * Finds the record that is the closest match to the specified key. 685 * Returns <code>key</code> or if it does not exist, at the first entry 686 * after the named key. 687 * 688- * @param key - key that we're trying to find 689- * @param val - data value if key is found 690- * @return - the key that was the closest match or null if eof. 691 */ 692 public synchronized WritableComparable getClosest(WritableComparable key, 693 Writable val) 694 throws IOException { 695 return getClosest(key, val, false); 696 } 697 698 /** 699 * Finds the record that is the closest match to the specified key. 700 * 701 * @param key - key that we're trying to find 702 * @param val - data value if key is found 703 * @param before - IF true, and <code>key</code> does not exist, return 704 * the first entry that falls just before the <code>key</code>. Otherwise, 705 * return the record that sorts just after. 706 * @return - the key that was the closest match or null if eof. 707 */ 708 public synchronized WritableComparable getClosest(WritableComparable key, 709 Writable val, final boolean before) 710 throws IOException { 711 712 int c = seekInternal(key, before); 713 714 // If we didn't get an exact match, and we ended up in the wrong 715 // direction relative to the query key, return null since we 716 // must be at the beginning or end of the file. 717 if ((!before && c > 0) || 718 (before && c < 0)) { 719 return null; 720 } 721 722 data.getCurrentValue(val); 723 return nextKey; 724 } 725 726 /** Close the map. */ 727 @Override 728 public synchronized void close() throws IOException { 729 if (!indexClosed) { 730 index.close(); 731 } 732 data.close(); 733 } 734 735 } 736 737 /** Renames an existing map directory. */ 738 public static void rename(FileSystem fs, String oldName, String newName) 739 throws IOException { 740 Path oldDir = new Path(oldName); 741 Path newDir = new Path(newName); 742 if (!fs.rename(oldDir, newDir)) { 743 throw new IOException("Could not rename " + oldDir + " to " + newDir); 744 } 745 } 746 747 /** Deletes the named map file. */ 748 public static void delete(FileSystem fs, String name) throws IOException { 749 Path dir = new Path(name); 750 Path data = new Path(dir, DATA_FILE_NAME); 751 Path index = new Path(dir, INDEX_FILE_NAME); 752 753 fs.delete(data, true); 754 fs.delete(index, true); 755 fs.delete(dir, true); 756 } 757 758 /** 759 * This method attempts to fix a corrupt MapFile by re-creating its index. 760 * @param fs filesystem 761 * @param dir directory containing the MapFile data and index 762 * @param keyClass key class (has to be a subclass of Writable) 763 * @param valueClass value class (has to be a subclass of Writable) 764 * @param dryrun do not perform any changes, just report what needs to be done 765 * @return number of valid entries in this MapFile, or -1 if no fixing was needed 766 * @throws Exception 767 */ 768 public static long fix(FileSystem fs, Path dir, 769 Class<? extends Writable> keyClass, 770 Class<? extends Writable> valueClass, boolean dryrun, 771 Configuration conf) throws Exception { 772 String dr = (dryrun ? "[DRY RUN ] " : ""); 773 Path data = new Path(dir, DATA_FILE_NAME); 774 Path index = new Path(dir, INDEX_FILE_NAME); 775 int indexInterval = conf.getInt(Writer.INDEX_INTERVAL, 128); 776 if (!fs.exists(data)) { 777 // there's nothing we can do to fix this! 778 throw new Exception(dr + "Missing data file in " + dir + ", impossible to fix this."); 779 } 780 if (fs.exists(index)) { 781 // no fixing needed 782 return -1; 783 } 784 SequenceFile.Reader dataReader = 785 new SequenceFile.Reader(conf, SequenceFile.Reader.file(data)); 786 if (!dataReader.getKeyClass().equals(keyClass)) { 787 throw new Exception(dr + "Wrong key class in " + dir + ", expected" + keyClass.getName() + 788 ", got " + dataReader.getKeyClass().getName()); 789 } 790 if (!dataReader.getValueClass().equals(valueClass)) { 791 throw new Exception(dr + "Wrong value class in " + dir + ", expected" + valueClass.getName() + 792 ", got " + dataReader.getValueClass().getName()); 793 } 794 long cnt = 0L; 795 Writable key = ReflectionUtils.newInstance(keyClass, conf); 796 Writable value = ReflectionUtils.newInstance(valueClass, conf); 797 SequenceFile.Writer indexWriter = null; 798 if (!dryrun) { 799 indexWriter = 800 SequenceFile.createWriter(conf, 801 SequenceFile.Writer.file(index), 802 SequenceFile.Writer.keyClass(keyClass), 803 SequenceFile.Writer.valueClass 804 (LongWritable.class)); 805 } 806 try { 807 long pos = 0L; 808 LongWritable position = new LongWritable(); 809 while(dataReader.next(key, value)) { 810 cnt++; 811 if (cnt % indexInterval == 0) { 812 position.set(pos); 813 if (!dryrun) indexWriter.append(key, position); 814 } 815 pos = dataReader.getPosition(); 816 } 817 } catch(Throwable t) { 818 // truncated data file. swallow it. 819 } 820 dataReader.close(); 821 if (!dryrun) indexWriter.close(); 822 return cnt; 823 } 824 825 826 public static void main(String[] args) throws Exception { 827 String usage = "Usage: MapFile inFile outFile"; 828 829 if (args.length != 2) { 830 System.err.println(usage); 831 System.exit(-1); 832 } 833 834 String in = args[0]; 835 String out = args[1]; 836 837 Configuration conf = new Configuration(); 838 FileSystem fs = FileSystem.getLocal(conf); 839 MapFile.Reader reader = new MapFile.Reader(fs, in, conf); 840 MapFile.Writer writer = 841 new MapFile.Writer(conf, fs, out, 842 reader.getKeyClass().asSubclass(WritableComparable.class), 843 reader.getValueClass()); 844 845 WritableComparable key = 846 ReflectionUtils.newInstance(reader.getKeyClass().asSubclass(WritableComparable.class), conf); 847 Writable value = 848 ReflectionUtils.newInstance(reader.getValueClass().asSubclass(Writable.class), conf); 849 850 while (reader.next(key, value)) // copy all entries 851 writer.append(key, value); 852 853 writer.close(); 854 } 855 856}