001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019 package org.apache.hadoop.io;
020
021 import java.io.EOFException;
022 import java.io.IOException;
023 import java.util.ArrayList;
024 import java.util.Arrays;
025
026 import org.apache.commons.logging.Log;
027 import org.apache.commons.logging.LogFactory;
028 import org.apache.hadoop.classification.InterfaceAudience;
029 import org.apache.hadoop.classification.InterfaceStability;
030 import org.apache.hadoop.conf.Configuration;
031 import org.apache.hadoop.fs.FileSystem;
032 import org.apache.hadoop.fs.Path;
033 import org.apache.hadoop.io.SequenceFile.CompressionType;
034 import org.apache.hadoop.io.compress.CompressionCodec;
035 import org.apache.hadoop.util.Options;
036 import org.apache.hadoop.util.Progressable;
037 import org.apache.hadoop.util.ReflectionUtils;
038
039 /** A file-based map from keys to values.
040 *
041 * <p>A map is a directory containing two files, the <code>data</code> file,
042 * containing all keys and values in the map, and a smaller <code>index</code>
043 * file, containing a fraction of the keys. The fraction is determined by
044 * {@link Writer#getIndexInterval()}.
045 *
046 * <p>The index file is read entirely into memory. Thus key implementations
047 * should try to keep themselves small.
048 *
049 * <p>Map files are created by adding entries in-order. To maintain a large
050 * database, perform updates by copying the previous version of a database and
051 * merging in a sorted change list, to create a new version of the database in
052 * a new file. Sorting large change lists can be done with {@link
053 * SequenceFile.Sorter}.
054 */
055 @InterfaceAudience.Public
056 @InterfaceStability.Stable
057 public class MapFile {
058 private static final Log LOG = LogFactory.getLog(MapFile.class);
059
060 /** The name of the index file. */
061 public static final String INDEX_FILE_NAME = "index";
062
063 /** The name of the data file. */
064 public static final String DATA_FILE_NAME = "data";
065
066 protected MapFile() {} // no public ctor
067
068 /** Writes a new map. */
069 public static class Writer implements java.io.Closeable {
070 private SequenceFile.Writer data;
071 private SequenceFile.Writer index;
072
073 final private static String INDEX_INTERVAL = "io.map.index.interval";
074 private int indexInterval = 128;
075
076 private long size;
077 private LongWritable position = new LongWritable();
078
079 // the following fields are used only for checking key order
080 private WritableComparator comparator;
081 private DataInputBuffer inBuf = new DataInputBuffer();
082 private DataOutputBuffer outBuf = new DataOutputBuffer();
083 private WritableComparable lastKey;
084
085 /** What's the position (in bytes) we wrote when we got the last index */
086 private long lastIndexPos = -1;
087
088 /**
089 * What was size when we last wrote an index. Set to MIN_VALUE to ensure that
090 * we have an index at position zero -- midKey will throw an exception if this
091 * is not the case
092 */
093 private long lastIndexKeyCount = Long.MIN_VALUE;
094
095
096 /** Create the named map for keys of the named class.
097 * @deprecated Use Writer(Configuration, Path, Option...) instead.
098 */
099 @Deprecated
100 public Writer(Configuration conf, FileSystem fs, String dirName,
101 Class<? extends WritableComparable> keyClass,
102 Class valClass) throws IOException {
103 this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass));
104 }
105
106 /** Create the named map for keys of the named class.
107 * @deprecated Use Writer(Configuration, Path, Option...) instead.
108 */
109 @Deprecated
110 public Writer(Configuration conf, FileSystem fs, String dirName,
111 Class<? extends WritableComparable> keyClass, Class valClass,
112 CompressionType compress,
113 Progressable progress) throws IOException {
114 this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass),
115 compression(compress), progressable(progress));
116 }
117
118 /** Create the named map for keys of the named class.
119 * @deprecated Use Writer(Configuration, Path, Option...) instead.
120 */
121 @Deprecated
122 public Writer(Configuration conf, FileSystem fs, String dirName,
123 Class<? extends WritableComparable> keyClass, Class valClass,
124 CompressionType compress, CompressionCodec codec,
125 Progressable progress) throws IOException {
126 this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass),
127 compression(compress, codec), progressable(progress));
128 }
129
130 /** Create the named map for keys of the named class.
131 * @deprecated Use Writer(Configuration, Path, Option...) instead.
132 */
133 @Deprecated
134 public Writer(Configuration conf, FileSystem fs, String dirName,
135 Class<? extends WritableComparable> keyClass, Class valClass,
136 CompressionType compress) throws IOException {
137 this(conf, new Path(dirName), keyClass(keyClass),
138 valueClass(valClass), compression(compress));
139 }
140
141 /** Create the named map using the named key comparator.
142 * @deprecated Use Writer(Configuration, Path, Option...) instead.
143 */
144 @Deprecated
145 public Writer(Configuration conf, FileSystem fs, String dirName,
146 WritableComparator comparator, Class valClass
147 ) throws IOException {
148 this(conf, new Path(dirName), comparator(comparator),
149 valueClass(valClass));
150 }
151
152 /** Create the named map using the named key comparator.
153 * @deprecated Use Writer(Configuration, Path, Option...) instead.
154 */
155 @Deprecated
156 public Writer(Configuration conf, FileSystem fs, String dirName,
157 WritableComparator comparator, Class valClass,
158 SequenceFile.CompressionType compress) throws IOException {
159 this(conf, new Path(dirName), comparator(comparator),
160 valueClass(valClass), compression(compress));
161 }
162
163 /** Create the named map using the named key comparator.
164 * @deprecated Use Writer(Configuration, Path, Option...)} instead.
165 */
166 @Deprecated
167 public Writer(Configuration conf, FileSystem fs, String dirName,
168 WritableComparator comparator, Class valClass,
169 SequenceFile.CompressionType compress,
170 Progressable progress) throws IOException {
171 this(conf, new Path(dirName), comparator(comparator),
172 valueClass(valClass), compression(compress),
173 progressable(progress));
174 }
175
176 /** Create the named map using the named key comparator.
177 * @deprecated Use Writer(Configuration, Path, Option...) instead.
178 */
179 @Deprecated
180 public Writer(Configuration conf, FileSystem fs, String dirName,
181 WritableComparator comparator, Class valClass,
182 SequenceFile.CompressionType compress, CompressionCodec codec,
183 Progressable progress) throws IOException {
184 this(conf, new Path(dirName), comparator(comparator),
185 valueClass(valClass), compression(compress, codec),
186 progressable(progress));
187 }
188
189 // our options are a superset of sequence file writer options
190 public static interface Option extends SequenceFile.Writer.Option { }
191
192 private static class KeyClassOption extends Options.ClassOption
193 implements Option {
194 KeyClassOption(Class<?> value) {
195 super(value);
196 }
197 }
198
199 private static class ComparatorOption implements Option {
200 private final WritableComparator value;
201 ComparatorOption(WritableComparator value) {
202 this.value = value;
203 }
204 WritableComparator getValue() {
205 return value;
206 }
207 }
208
209 public static Option keyClass(Class<? extends WritableComparable> value) {
210 return new KeyClassOption(value);
211 }
212
213 public static Option comparator(WritableComparator value) {
214 return new ComparatorOption(value);
215 }
216
217 public static SequenceFile.Writer.Option valueClass(Class<?> value) {
218 return SequenceFile.Writer.valueClass(value);
219 }
220
221 public static
222 SequenceFile.Writer.Option compression(CompressionType type) {
223 return SequenceFile.Writer.compression(type);
224 }
225
226 public static
227 SequenceFile.Writer.Option compression(CompressionType type,
228 CompressionCodec codec) {
229 return SequenceFile.Writer.compression(type, codec);
230 }
231
232 public static SequenceFile.Writer.Option progressable(Progressable value) {
233 return SequenceFile.Writer.progressable(value);
234 }
235
236 @SuppressWarnings("unchecked")
237 public Writer(Configuration conf,
238 Path dirName,
239 SequenceFile.Writer.Option... opts
240 ) throws IOException {
241 KeyClassOption keyClassOption =
242 Options.getOption(KeyClassOption.class, opts);
243 ComparatorOption comparatorOption =
244 Options.getOption(ComparatorOption.class, opts);
245 if ((keyClassOption == null) == (comparatorOption == null)) {
246 throw new IllegalArgumentException("key class or comparator option "
247 + "must be set");
248 }
249 this.indexInterval = conf.getInt(INDEX_INTERVAL, this.indexInterval);
250
251 Class<? extends WritableComparable> keyClass;
252 if (keyClassOption == null) {
253 this.comparator = comparatorOption.getValue();
254 keyClass = comparator.getKeyClass();
255 } else {
256 keyClass=
257 (Class<? extends WritableComparable>) keyClassOption.getValue();
258 this.comparator = WritableComparator.get(keyClass);
259 }
260 this.lastKey = comparator.newKey();
261 FileSystem fs = dirName.getFileSystem(conf);
262
263 if (!fs.mkdirs(dirName)) {
264 throw new IOException("Mkdirs failed to create directory " + dirName);
265 }
266 Path dataFile = new Path(dirName, DATA_FILE_NAME);
267 Path indexFile = new Path(dirName, INDEX_FILE_NAME);
268
269 SequenceFile.Writer.Option[] dataOptions =
270 Options.prependOptions(opts,
271 SequenceFile.Writer.file(dataFile),
272 SequenceFile.Writer.keyClass(keyClass));
273 this.data = SequenceFile.createWriter(conf, dataOptions);
274
275 SequenceFile.Writer.Option[] indexOptions =
276 Options.prependOptions(opts, SequenceFile.Writer.file(indexFile),
277 SequenceFile.Writer.keyClass(keyClass),
278 SequenceFile.Writer.valueClass(LongWritable.class),
279 SequenceFile.Writer.compression(CompressionType.BLOCK));
280 this.index = SequenceFile.createWriter(conf, indexOptions);
281 }
282
283 /** The number of entries that are added before an index entry is added.*/
284 public int getIndexInterval() { return indexInterval; }
285
286 /** Sets the index interval.
287 * @see #getIndexInterval()
288 */
289 public void setIndexInterval(int interval) { indexInterval = interval; }
290
291 /** Sets the index interval and stores it in conf
292 * @see #getIndexInterval()
293 */
294 public static void setIndexInterval(Configuration conf, int interval) {
295 conf.setInt(INDEX_INTERVAL, interval);
296 }
297
298 /** Close the map. */
299 @Override
300 public synchronized void close() throws IOException {
301 data.close();
302 index.close();
303 }
304
305 /** Append a key/value pair to the map. The key must be greater or equal
306 * to the previous key added to the map. */
307 public synchronized void append(WritableComparable key, Writable val)
308 throws IOException {
309
310 checkKey(key);
311
312 long pos = data.getLength();
313 // Only write an index if we've changed positions. In a block compressed
314 // file, this means we write an entry at the start of each block
315 if (size >= lastIndexKeyCount + indexInterval && pos > lastIndexPos) {
316 position.set(pos); // point to current eof
317 index.append(key, position);
318 lastIndexPos = pos;
319 lastIndexKeyCount = size;
320 }
321
322 data.append(key, val); // append key/value to data
323 size++;
324 }
325
326 private void checkKey(WritableComparable key) throws IOException {
327 // check that keys are well-ordered
328 if (size != 0 && comparator.compare(lastKey, key) > 0)
329 throw new IOException("key out of order: "+key+" after "+lastKey);
330
331 // update lastKey with a copy of key by writing and reading
332 outBuf.reset();
333 key.write(outBuf); // write new key
334
335 inBuf.reset(outBuf.getData(), outBuf.getLength());
336 lastKey.readFields(inBuf); // read into lastKey
337 }
338
339 }
340
341 /** Provide access to an existing map. */
342 public static class Reader implements java.io.Closeable {
343
344 /** Number of index entries to skip between each entry. Zero by default.
345 * Setting this to values larger than zero can facilitate opening large map
346 * files using less memory. */
347 private int INDEX_SKIP = 0;
348
349 private WritableComparator comparator;
350
351 private WritableComparable nextKey;
352 private long seekPosition = -1;
353 private int seekIndex = -1;
354 private long firstPosition;
355
356 // the data, on disk
357 private SequenceFile.Reader data;
358 private SequenceFile.Reader index;
359
360 // whether the index Reader was closed
361 private boolean indexClosed = false;
362
363 // the index, in memory
364 private int count = -1;
365 private WritableComparable[] keys;
366 private long[] positions;
367
368 /** Returns the class of keys in this file. */
369 public Class<?> getKeyClass() { return data.getKeyClass(); }
370
371 /** Returns the class of values in this file. */
372 public Class<?> getValueClass() { return data.getValueClass(); }
373
374 public static interface Option extends SequenceFile.Reader.Option {}
375
376 public static Option comparator(WritableComparator value) {
377 return new ComparatorOption(value);
378 }
379
380 static class ComparatorOption implements Option {
381 private final WritableComparator value;
382 ComparatorOption(WritableComparator value) {
383 this.value = value;
384 }
385 WritableComparator getValue() {
386 return value;
387 }
388 }
389
390 public Reader(Path dir, Configuration conf,
391 SequenceFile.Reader.Option... opts) throws IOException {
392 ComparatorOption comparatorOption =
393 Options.getOption(ComparatorOption.class, opts);
394 WritableComparator comparator =
395 comparatorOption == null ? null : comparatorOption.getValue();
396 INDEX_SKIP = conf.getInt("io.map.index.skip", 0);
397 open(dir, comparator, conf, opts);
398 }
399
400 /** Construct a map reader for the named map.
401 * @deprecated
402 */
403 @Deprecated
404 public Reader(FileSystem fs, String dirName,
405 Configuration conf) throws IOException {
406 this(new Path(dirName), conf);
407 }
408
409 /** Construct a map reader for the named map using the named comparator.
410 * @deprecated
411 */
412 @Deprecated
413 public Reader(FileSystem fs, String dirName, WritableComparator comparator,
414 Configuration conf) throws IOException {
415 this(new Path(dirName), conf, comparator(comparator));
416 }
417
418 protected synchronized void open(Path dir,
419 WritableComparator comparator,
420 Configuration conf,
421 SequenceFile.Reader.Option... options
422 ) throws IOException {
423 Path dataFile = new Path(dir, DATA_FILE_NAME);
424 Path indexFile = new Path(dir, INDEX_FILE_NAME);
425
426 // open the data
427 this.data = createDataFileReader(dataFile, conf, options);
428 this.firstPosition = data.getPosition();
429
430 if (comparator == null)
431 this.comparator =
432 WritableComparator.get(data.getKeyClass().
433 asSubclass(WritableComparable.class));
434 else
435 this.comparator = comparator;
436
437 // open the index
438 SequenceFile.Reader.Option[] indexOptions =
439 Options.prependOptions(options, SequenceFile.Reader.file(indexFile));
440 this.index = new SequenceFile.Reader(conf, indexOptions);
441 }
442
443 /**
444 * Override this method to specialize the type of
445 * {@link SequenceFile.Reader} returned.
446 */
447 protected SequenceFile.Reader
448 createDataFileReader(Path dataFile, Configuration conf,
449 SequenceFile.Reader.Option... options
450 ) throws IOException {
451 SequenceFile.Reader.Option[] newOptions =
452 Options.prependOptions(options, SequenceFile.Reader.file(dataFile));
453 return new SequenceFile.Reader(conf, newOptions);
454 }
455
456 private void readIndex() throws IOException {
457 // read the index entirely into memory
458 if (this.keys != null)
459 return;
460 this.count = 0;
461 this.positions = new long[1024];
462
463 try {
464 int skip = INDEX_SKIP;
465 LongWritable position = new LongWritable();
466 WritableComparable lastKey = null;
467 long lastIndex = -1;
468 ArrayList<WritableComparable> keyBuilder = new ArrayList<WritableComparable>(1024);
469 while (true) {
470 WritableComparable k = comparator.newKey();
471
472 if (!index.next(k, position))
473 break;
474
475 // check order to make sure comparator is compatible
476 if (lastKey != null && comparator.compare(lastKey, k) > 0)
477 throw new IOException("key out of order: "+k+" after "+lastKey);
478 lastKey = k;
479 if (skip > 0) {
480 skip--;
481 continue; // skip this entry
482 } else {
483 skip = INDEX_SKIP; // reset skip
484 }
485
486 // don't read an index that is the same as the previous one. Block
487 // compressed map files used to do this (multiple entries would point
488 // at the same block)
489 if (position.get() == lastIndex)
490 continue;
491
492 if (count == positions.length) {
493 positions = Arrays.copyOf(positions, positions.length * 2);
494 }
495
496 keyBuilder.add(k);
497 positions[count] = position.get();
498 count++;
499 }
500
501 this.keys = keyBuilder.toArray(new WritableComparable[count]);
502 positions = Arrays.copyOf(positions, count);
503 } catch (EOFException e) {
504 LOG.warn("Unexpected EOF reading " + index +
505 " at entry #" + count + ". Ignoring.");
506 } finally {
507 indexClosed = true;
508 index.close();
509 }
510 }
511
512 /** Re-positions the reader before its first key. */
513 public synchronized void reset() throws IOException {
514 data.seek(firstPosition);
515 }
516
517 /** Get the key at approximately the middle of the file. Or null if the
518 * file is empty.
519 */
520 public synchronized WritableComparable midKey() throws IOException {
521
522 readIndex();
523 if (count == 0) {
524 return null;
525 }
526
527 return keys[(count - 1) / 2];
528 }
529
530 /** Reads the final key from the file.
531 *
532 * @param key key to read into
533 */
534 public synchronized void finalKey(WritableComparable key)
535 throws IOException {
536
537 long originalPosition = data.getPosition(); // save position
538 try {
539 readIndex(); // make sure index is valid
540 if (count > 0) {
541 data.seek(positions[count-1]); // skip to last indexed entry
542 } else {
543 reset(); // start at the beginning
544 }
545 while (data.next(key)) {} // scan to eof
546
547 } finally {
548 data.seek(originalPosition); // restore position
549 }
550 }
551
552 /** Positions the reader at the named key, or if none such exists, at the
553 * first entry after the named key. Returns true iff the named key exists
554 * in this map.
555 */
556 public synchronized boolean seek(WritableComparable key) throws IOException {
557 return seekInternal(key) == 0;
558 }
559
560 /**
561 * Positions the reader at the named key, or if none such exists, at the
562 * first entry after the named key.
563 *
564 * @return 0 - exact match found
565 * < 0 - positioned at next record
566 * 1 - no more records in file
567 */
568 private synchronized int seekInternal(WritableComparable key)
569 throws IOException {
570 return seekInternal(key, false);
571 }
572
573 /**
574 * Positions the reader at the named key, or if none such exists, at the
575 * key that falls just before or just after dependent on how the
576 * <code>before</code> parameter is set.
577 *
578 * @param before - IF true, and <code>key</code> does not exist, position
579 * file at entry that falls just before <code>key</code>. Otherwise,
580 * position file at record that sorts just after.
581 * @return 0 - exact match found
582 * < 0 - positioned at next record
583 * 1 - no more records in file
584 */
585 private synchronized int seekInternal(WritableComparable key,
586 final boolean before)
587 throws IOException {
588 readIndex(); // make sure index is read
589
590 if (seekIndex != -1 // seeked before
591 && seekIndex+1 < count
592 && comparator.compare(key, keys[seekIndex+1])<0 // before next indexed
593 && comparator.compare(key, nextKey)
594 >= 0) { // but after last seeked
595 // do nothing
596 } else {
597 seekIndex = binarySearch(key);
598 if (seekIndex < 0) // decode insertion point
599 seekIndex = -seekIndex-2;
600
601 if (seekIndex == -1) // belongs before first entry
602 seekPosition = firstPosition; // use beginning of file
603 else
604 seekPosition = positions[seekIndex]; // else use index
605 }
606 data.seek(seekPosition);
607
608 if (nextKey == null)
609 nextKey = comparator.newKey();
610
611 // If we're looking for the key before, we need to keep track
612 // of the position we got the current key as well as the position
613 // of the key before it.
614 long prevPosition = -1;
615 long curPosition = seekPosition;
616
617 while (data.next(nextKey)) {
618 int c = comparator.compare(key, nextKey);
619 if (c <= 0) { // at or beyond desired
620 if (before && c != 0) {
621 if (prevPosition == -1) {
622 // We're on the first record of this index block
623 // and we've already passed the search key. Therefore
624 // we must be at the beginning of the file, so seek
625 // to the beginning of this block and return c
626 data.seek(curPosition);
627 } else {
628 // We have a previous record to back up to
629 data.seek(prevPosition);
630 data.next(nextKey);
631 // now that we've rewound, the search key must be greater than this key
632 return 1;
633 }
634 }
635 return c;
636 }
637 if (before) {
638 prevPosition = curPosition;
639 curPosition = data.getPosition();
640 }
641 }
642
643 return 1;
644 }
645
646 private int binarySearch(WritableComparable key) {
647 int low = 0;
648 int high = count-1;
649
650 while (low <= high) {
651 int mid = (low + high) >>> 1;
652 WritableComparable midVal = keys[mid];
653 int cmp = comparator.compare(midVal, key);
654
655 if (cmp < 0)
656 low = mid + 1;
657 else if (cmp > 0)
658 high = mid - 1;
659 else
660 return mid; // key found
661 }
662 return -(low + 1); // key not found.
663 }
664
665 /** Read the next key/value pair in the map into <code>key</code> and
666 * <code>val</code>. Returns true if such a pair exists and false when at
667 * the end of the map */
668 public synchronized boolean next(WritableComparable key, Writable val)
669 throws IOException {
670 return data.next(key, val);
671 }
672
673 /** Return the value for the named key, or null if none exists. */
674 public synchronized Writable get(WritableComparable key, Writable val)
675 throws IOException {
676 if (seek(key)) {
677 data.getCurrentValue(val);
678 return val;
679 } else
680 return null;
681 }
682
683 /**
684 * Finds the record that is the closest match to the specified key.
685 * Returns <code>key</code> or if it does not exist, at the first entry
686 * after the named key.
687 *
688 - * @param key - key that we're trying to find
689 - * @param val - data value if key is found
690 - * @return - the key that was the closest match or null if eof.
691 */
692 public synchronized WritableComparable getClosest(WritableComparable key,
693 Writable val)
694 throws IOException {
695 return getClosest(key, val, false);
696 }
697
698 /**
699 * Finds the record that is the closest match to the specified key.
700 *
701 * @param key - key that we're trying to find
702 * @param val - data value if key is found
703 * @param before - IF true, and <code>key</code> does not exist, return
704 * the first entry that falls just before the <code>key</code>. Otherwise,
705 * return the record that sorts just after.
706 * @return - the key that was the closest match or null if eof.
707 */
708 public synchronized WritableComparable getClosest(WritableComparable key,
709 Writable val, final boolean before)
710 throws IOException {
711
712 int c = seekInternal(key, before);
713
714 // If we didn't get an exact match, and we ended up in the wrong
715 // direction relative to the query key, return null since we
716 // must be at the beginning or end of the file.
717 if ((!before && c > 0) ||
718 (before && c < 0)) {
719 return null;
720 }
721
722 data.getCurrentValue(val);
723 return nextKey;
724 }
725
726 /** Close the map. */
727 @Override
728 public synchronized void close() throws IOException {
729 if (!indexClosed) {
730 index.close();
731 }
732 data.close();
733 }
734
735 }
736
737 /** Renames an existing map directory. */
738 public static void rename(FileSystem fs, String oldName, String newName)
739 throws IOException {
740 Path oldDir = new Path(oldName);
741 Path newDir = new Path(newName);
742 if (!fs.rename(oldDir, newDir)) {
743 throw new IOException("Could not rename " + oldDir + " to " + newDir);
744 }
745 }
746
747 /** Deletes the named map file. */
748 public static void delete(FileSystem fs, String name) throws IOException {
749 Path dir = new Path(name);
750 Path data = new Path(dir, DATA_FILE_NAME);
751 Path index = new Path(dir, INDEX_FILE_NAME);
752
753 fs.delete(data, true);
754 fs.delete(index, true);
755 fs.delete(dir, true);
756 }
757
758 /**
759 * This method attempts to fix a corrupt MapFile by re-creating its index.
760 * @param fs filesystem
761 * @param dir directory containing the MapFile data and index
762 * @param keyClass key class (has to be a subclass of Writable)
763 * @param valueClass value class (has to be a subclass of Writable)
764 * @param dryrun do not perform any changes, just report what needs to be done
765 * @return number of valid entries in this MapFile, or -1 if no fixing was needed
766 * @throws Exception
767 */
768 public static long fix(FileSystem fs, Path dir,
769 Class<? extends Writable> keyClass,
770 Class<? extends Writable> valueClass, boolean dryrun,
771 Configuration conf) throws Exception {
772 String dr = (dryrun ? "[DRY RUN ] " : "");
773 Path data = new Path(dir, DATA_FILE_NAME);
774 Path index = new Path(dir, INDEX_FILE_NAME);
775 int indexInterval = conf.getInt(Writer.INDEX_INTERVAL, 128);
776 if (!fs.exists(data)) {
777 // there's nothing we can do to fix this!
778 throw new Exception(dr + "Missing data file in " + dir + ", impossible to fix this.");
779 }
780 if (fs.exists(index)) {
781 // no fixing needed
782 return -1;
783 }
784 SequenceFile.Reader dataReader =
785 new SequenceFile.Reader(conf, SequenceFile.Reader.file(data));
786 if (!dataReader.getKeyClass().equals(keyClass)) {
787 throw new Exception(dr + "Wrong key class in " + dir + ", expected" + keyClass.getName() +
788 ", got " + dataReader.getKeyClass().getName());
789 }
790 if (!dataReader.getValueClass().equals(valueClass)) {
791 throw new Exception(dr + "Wrong value class in " + dir + ", expected" + valueClass.getName() +
792 ", got " + dataReader.getValueClass().getName());
793 }
794 long cnt = 0L;
795 Writable key = ReflectionUtils.newInstance(keyClass, conf);
796 Writable value = ReflectionUtils.newInstance(valueClass, conf);
797 SequenceFile.Writer indexWriter = null;
798 if (!dryrun) {
799 indexWriter =
800 SequenceFile.createWriter(conf,
801 SequenceFile.Writer.file(index),
802 SequenceFile.Writer.keyClass(keyClass),
803 SequenceFile.Writer.valueClass
804 (LongWritable.class));
805 }
806 try {
807 long pos = 0L;
808 LongWritable position = new LongWritable();
809 while(dataReader.next(key, value)) {
810 cnt++;
811 if (cnt % indexInterval == 0) {
812 position.set(pos);
813 if (!dryrun) indexWriter.append(key, position);
814 }
815 pos = dataReader.getPosition();
816 }
817 } catch(Throwable t) {
818 // truncated data file. swallow it.
819 }
820 dataReader.close();
821 if (!dryrun) indexWriter.close();
822 return cnt;
823 }
824
825
826 public static void main(String[] args) throws Exception {
827 String usage = "Usage: MapFile inFile outFile";
828
829 if (args.length != 2) {
830 System.err.println(usage);
831 System.exit(-1);
832 }
833
834 String in = args[0];
835 String out = args[1];
836
837 Configuration conf = new Configuration();
838 FileSystem fs = FileSystem.getLocal(conf);
839 MapFile.Reader reader = new MapFile.Reader(fs, in, conf);
840 MapFile.Writer writer =
841 new MapFile.Writer(conf, fs, out,
842 reader.getKeyClass().asSubclass(WritableComparable.class),
843 reader.getValueClass());
844
845 WritableComparable key =
846 ReflectionUtils.newInstance(reader.getKeyClass().asSubclass(WritableComparable.class), conf);
847 Writable value =
848 ReflectionUtils.newInstance(reader.getValueClass().asSubclass(Writable.class), conf);
849
850 while (reader.next(key, value)) // copy all entries
851 writer.append(key, value);
852
853 writer.close();
854 }
855
856 }