001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.io;
020
021import java.io.EOFException;
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.Arrays;
025
026import org.apache.commons.logging.Log;
027import org.apache.commons.logging.LogFactory;
028import org.apache.hadoop.classification.InterfaceAudience;
029import org.apache.hadoop.classification.InterfaceStability;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.fs.FileSystem;
032import org.apache.hadoop.fs.Path;
033import org.apache.hadoop.io.SequenceFile.CompressionType;
034import org.apache.hadoop.io.compress.CompressionCodec;
035import org.apache.hadoop.util.Options;
036import org.apache.hadoop.util.Progressable;
037import org.apache.hadoop.util.ReflectionUtils;
038
039/** A file-based map from keys to values.
040 * 
041 * <p>A map is a directory containing two files, the <code>data</code> file,
042 * containing all keys and values in the map, and a smaller <code>index</code>
043 * file, containing a fraction of the keys.  The fraction is determined by
044 * {@link Writer#getIndexInterval()}.
045 *
046 * <p>The index file is read entirely into memory.  Thus key implementations
047 * should try to keep themselves small.
048 *
049 * <p>Map files are created by adding entries in-order.  To maintain a large
050 * database, perform updates by copying the previous version of a database and
051 * merging in a sorted change list, to create a new version of the database in
052 * a new file.  Sorting large change lists can be done with {@link
053 * SequenceFile.Sorter}.
054 */
055@InterfaceAudience.Public
056@InterfaceStability.Stable
057public class MapFile {
058  private static final Log LOG = LogFactory.getLog(MapFile.class);
059
060  /** The name of the index file. */
061  public static final String INDEX_FILE_NAME = "index";
062
063  /** The name of the data file. */
064  public static final String DATA_FILE_NAME = "data";
065
066  protected MapFile() {}                          // no public ctor
067
068  /** Writes a new map. */
069  public static class Writer implements java.io.Closeable {
070    private SequenceFile.Writer data;
071    private SequenceFile.Writer index;
072
073    final private static String INDEX_INTERVAL = "io.map.index.interval";
074    private int indexInterval = 128;
075
076    private long size;
077    private LongWritable position = new LongWritable();
078
079    // the following fields are used only for checking key order
080    private WritableComparator comparator;
081    private DataInputBuffer inBuf = new DataInputBuffer();
082    private DataOutputBuffer outBuf = new DataOutputBuffer();
083    private WritableComparable lastKey;
084
085    /** What's the position (in bytes) we wrote when we got the last index */
086    private long lastIndexPos = -1;
087
088    /**
089     * What was size when we last wrote an index. Set to MIN_VALUE to ensure that
090     * we have an index at position zero -- midKey will throw an exception if this
091     * is not the case
092     */
093    private long lastIndexKeyCount = Long.MIN_VALUE;
094
095
096    /** Create the named map for keys of the named class. 
097     * @deprecated Use Writer(Configuration, Path, Option...) instead.
098     */
099    @Deprecated
100    public Writer(Configuration conf, FileSystem fs, String dirName,
101                  Class<? extends WritableComparable> keyClass, 
102                  Class valClass) throws IOException {
103      this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass));
104    }
105
106    /** Create the named map for keys of the named class. 
107     * @deprecated Use Writer(Configuration, Path, Option...) instead.
108     */
109    @Deprecated
110    public Writer(Configuration conf, FileSystem fs, String dirName,
111                  Class<? extends WritableComparable> keyClass, Class valClass,
112                  CompressionType compress, 
113                  Progressable progress) throws IOException {
114      this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass),
115           compression(compress), progressable(progress));
116    }
117
118    /** Create the named map for keys of the named class. 
119     * @deprecated Use Writer(Configuration, Path, Option...) instead.
120     */
121    @Deprecated
122    public Writer(Configuration conf, FileSystem fs, String dirName,
123                  Class<? extends WritableComparable> keyClass, Class valClass,
124                  CompressionType compress, CompressionCodec codec,
125                  Progressable progress) throws IOException {
126      this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass),
127           compression(compress, codec), progressable(progress));
128    }
129
130    /** Create the named map for keys of the named class. 
131     * @deprecated Use Writer(Configuration, Path, Option...) instead.
132     */
133    @Deprecated
134    public Writer(Configuration conf, FileSystem fs, String dirName,
135                  Class<? extends WritableComparable> keyClass, Class valClass,
136                  CompressionType compress) throws IOException {
137      this(conf, new Path(dirName), keyClass(keyClass),
138           valueClass(valClass), compression(compress));
139    }
140
141    /** Create the named map using the named key comparator. 
142     * @deprecated Use Writer(Configuration, Path, Option...) instead.
143     */
144    @Deprecated
145    public Writer(Configuration conf, FileSystem fs, String dirName,
146                  WritableComparator comparator, Class valClass
147                  ) throws IOException {
148      this(conf, new Path(dirName), comparator(comparator), 
149           valueClass(valClass));
150    }
151
152    /** Create the named map using the named key comparator. 
153     * @deprecated Use Writer(Configuration, Path, Option...) instead.
154     */
155    @Deprecated
156    public Writer(Configuration conf, FileSystem fs, String dirName,
157                  WritableComparator comparator, Class valClass,
158                  SequenceFile.CompressionType compress) throws IOException {
159      this(conf, new Path(dirName), comparator(comparator),
160           valueClass(valClass), compression(compress));
161    }
162
163    /** Create the named map using the named key comparator. 
164     * @deprecated Use Writer(Configuration, Path, Option...)} instead.
165     */
166    @Deprecated
167    public Writer(Configuration conf, FileSystem fs, String dirName,
168                  WritableComparator comparator, Class valClass,
169                  SequenceFile.CompressionType compress,
170                  Progressable progress) throws IOException {
171      this(conf, new Path(dirName), comparator(comparator),
172           valueClass(valClass), compression(compress),
173           progressable(progress));
174    }
175
176    /** Create the named map using the named key comparator. 
177     * @deprecated Use Writer(Configuration, Path, Option...) instead.
178     */
179    @Deprecated
180    public Writer(Configuration conf, FileSystem fs, String dirName,
181                  WritableComparator comparator, Class valClass,
182                  SequenceFile.CompressionType compress, CompressionCodec codec,
183                  Progressable progress) throws IOException {
184      this(conf, new Path(dirName), comparator(comparator),
185           valueClass(valClass), compression(compress, codec),
186           progressable(progress));
187    }
188    
189    // our options are a superset of sequence file writer options
190    public static interface Option extends SequenceFile.Writer.Option { }
191    
192    private static class KeyClassOption extends Options.ClassOption
193                                        implements Option {
194      KeyClassOption(Class<?> value) {
195        super(value);
196      }
197    }
198    
199    private static class ComparatorOption implements Option {
200      private final WritableComparator value;
201      ComparatorOption(WritableComparator value) {
202        this.value = value;
203      }
204      WritableComparator getValue() {
205        return value;
206      }
207    }
208
209    public static Option keyClass(Class<? extends WritableComparable> value) {
210      return new KeyClassOption(value);
211    }
212    
213    public static Option comparator(WritableComparator value) {
214      return new ComparatorOption(value);
215    }
216
217    public static SequenceFile.Writer.Option valueClass(Class<?> value) {
218      return SequenceFile.Writer.valueClass(value);
219    }
220    
221    public static 
222    SequenceFile.Writer.Option compression(CompressionType type) {
223      return SequenceFile.Writer.compression(type);
224    }
225
226    public static 
227    SequenceFile.Writer.Option compression(CompressionType type,
228        CompressionCodec codec) {
229      return SequenceFile.Writer.compression(type, codec);
230    }
231
232    public static SequenceFile.Writer.Option progressable(Progressable value) {
233      return SequenceFile.Writer.progressable(value);
234    }
235
236    @SuppressWarnings("unchecked")
237    public Writer(Configuration conf, 
238                  Path dirName,
239                  SequenceFile.Writer.Option... opts
240                  ) throws IOException {
241      KeyClassOption keyClassOption = 
242        Options.getOption(KeyClassOption.class, opts);
243      ComparatorOption comparatorOption =
244        Options.getOption(ComparatorOption.class, opts);
245      if ((keyClassOption == null) == (comparatorOption == null)) {
246        throw new IllegalArgumentException("key class or comparator option "
247                                           + "must be set");
248      }
249      this.indexInterval = conf.getInt(INDEX_INTERVAL, this.indexInterval);
250
251      Class<? extends WritableComparable> keyClass;
252      if (keyClassOption == null) {
253        this.comparator = comparatorOption.getValue();
254        keyClass = comparator.getKeyClass();
255      } else {
256        keyClass= 
257          (Class<? extends WritableComparable>) keyClassOption.getValue();
258        this.comparator = WritableComparator.get(keyClass);
259      }
260      this.lastKey = comparator.newKey();
261      FileSystem fs = dirName.getFileSystem(conf);
262
263      if (!fs.mkdirs(dirName)) {
264        throw new IOException("Mkdirs failed to create directory " + dirName);
265      }
266      Path dataFile = new Path(dirName, DATA_FILE_NAME);
267      Path indexFile = new Path(dirName, INDEX_FILE_NAME);
268
269      SequenceFile.Writer.Option[] dataOptions =
270        Options.prependOptions(opts, 
271                               SequenceFile.Writer.file(dataFile),
272                               SequenceFile.Writer.keyClass(keyClass));
273      this.data = SequenceFile.createWriter(conf, dataOptions);
274
275      SequenceFile.Writer.Option[] indexOptions =
276        Options.prependOptions(opts, SequenceFile.Writer.file(indexFile),
277            SequenceFile.Writer.keyClass(keyClass),
278            SequenceFile.Writer.valueClass(LongWritable.class),
279            SequenceFile.Writer.compression(CompressionType.BLOCK));
280      this.index = SequenceFile.createWriter(conf, indexOptions);      
281    }
282
283    /** The number of entries that are added before an index entry is added.*/
284    public int getIndexInterval() { return indexInterval; }
285
286    /** Sets the index interval.
287     * @see #getIndexInterval()
288     */
289    public void setIndexInterval(int interval) { indexInterval = interval; }
290
291    /** Sets the index interval and stores it in conf
292     * @see #getIndexInterval()
293     */
294    public static void setIndexInterval(Configuration conf, int interval) {
295      conf.setInt(INDEX_INTERVAL, interval);
296    }
297
298    /** Close the map. */
299    @Override
300    public synchronized void close() throws IOException {
301      data.close();
302      index.close();
303    }
304
305    /** Append a key/value pair to the map.  The key must be greater or equal
306     * to the previous key added to the map. */
307    public synchronized void append(WritableComparable key, Writable val)
308      throws IOException {
309
310      checkKey(key);
311
312      long pos = data.getLength();      
313      // Only write an index if we've changed positions. In a block compressed
314      // file, this means we write an entry at the start of each block      
315      if (size >= lastIndexKeyCount + indexInterval && pos > lastIndexPos) {
316        position.set(pos);                        // point to current eof
317        index.append(key, position);
318        lastIndexPos = pos;
319        lastIndexKeyCount = size;
320      }
321
322      data.append(key, val);                      // append key/value to data
323      size++;
324    }
325
326    private void checkKey(WritableComparable key) throws IOException {
327      // check that keys are well-ordered
328      if (size != 0 && comparator.compare(lastKey, key) > 0)
329        throw new IOException("key out of order: "+key+" after "+lastKey);
330          
331      // update lastKey with a copy of key by writing and reading
332      outBuf.reset();
333      key.write(outBuf);                          // write new key
334
335      inBuf.reset(outBuf.getData(), outBuf.getLength());
336      lastKey.readFields(inBuf);                  // read into lastKey
337    }
338
339  }
340  
341  /** Provide access to an existing map. */
342  public static class Reader implements java.io.Closeable {
343      
344    /** Number of index entries to skip between each entry.  Zero by default.
345     * Setting this to values larger than zero can facilitate opening large map
346     * files using less memory. */
347    private int INDEX_SKIP = 0;
348      
349    private WritableComparator comparator;
350
351    private WritableComparable nextKey;
352    private long seekPosition = -1;
353    private int seekIndex = -1;
354    private long firstPosition;
355
356    // the data, on disk
357    private SequenceFile.Reader data;
358    private SequenceFile.Reader index;
359
360    // whether the index Reader was closed
361    private boolean indexClosed = false;
362
363    // the index, in memory
364    private int count = -1;
365    private WritableComparable[] keys;
366    private long[] positions;
367
368    /** Returns the class of keys in this file. */
369    public Class<?> getKeyClass() { return data.getKeyClass(); }
370
371    /** Returns the class of values in this file. */
372    public Class<?> getValueClass() { return data.getValueClass(); }
373
374    public static interface Option extends SequenceFile.Reader.Option {}
375    
376    public static Option comparator(WritableComparator value) {
377      return new ComparatorOption(value);
378    }
379
380    static class ComparatorOption implements Option {
381      private final WritableComparator value;
382      ComparatorOption(WritableComparator value) {
383        this.value = value;
384      }
385      WritableComparator getValue() {
386        return value;
387      }
388    }
389
390    public Reader(Path dir, Configuration conf,
391                  SequenceFile.Reader.Option... opts) throws IOException {
392      ComparatorOption comparatorOption = 
393        Options.getOption(ComparatorOption.class, opts);
394      WritableComparator comparator =
395        comparatorOption == null ? null : comparatorOption.getValue();
396      INDEX_SKIP = conf.getInt("io.map.index.skip", 0);
397      open(dir, comparator, conf, opts);
398    }
399 
400    /** Construct a map reader for the named map.
401     * @deprecated
402     */
403    @Deprecated
404    public Reader(FileSystem fs, String dirName, 
405                  Configuration conf) throws IOException {
406      this(new Path(dirName), conf);
407    }
408
409    /** Construct a map reader for the named map using the named comparator.
410     * @deprecated
411     */
412    @Deprecated
413    public Reader(FileSystem fs, String dirName, WritableComparator comparator, 
414                  Configuration conf) throws IOException {
415      this(new Path(dirName), conf, comparator(comparator));
416    }
417    
418    protected synchronized void open(Path dir,
419                                     WritableComparator comparator,
420                                     Configuration conf, 
421                                     SequenceFile.Reader.Option... options
422                                     ) throws IOException {
423      Path dataFile = new Path(dir, DATA_FILE_NAME);
424      Path indexFile = new Path(dir, INDEX_FILE_NAME);
425
426      // open the data
427      this.data = createDataFileReader(dataFile, conf, options);
428      this.firstPosition = data.getPosition();
429
430      if (comparator == null)
431        this.comparator = 
432          WritableComparator.get(data.getKeyClass().
433                                   asSubclass(WritableComparable.class));
434      else
435        this.comparator = comparator;
436
437      // open the index
438      SequenceFile.Reader.Option[] indexOptions =
439        Options.prependOptions(options, SequenceFile.Reader.file(indexFile));
440      this.index = new SequenceFile.Reader(conf, indexOptions);
441    }
442
443    /**
444     * Override this method to specialize the type of
445     * {@link SequenceFile.Reader} returned.
446     */
447    protected SequenceFile.Reader 
448      createDataFileReader(Path dataFile, Configuration conf,
449                           SequenceFile.Reader.Option... options
450                           ) throws IOException {
451      SequenceFile.Reader.Option[] newOptions =
452        Options.prependOptions(options, SequenceFile.Reader.file(dataFile));
453      return new SequenceFile.Reader(conf, newOptions);
454    }
455
456    private void readIndex() throws IOException {
457      // read the index entirely into memory
458      if (this.keys != null)
459        return;
460      this.count = 0;
461      this.positions = new long[1024];
462
463      try {
464        int skip = INDEX_SKIP;
465        LongWritable position = new LongWritable();
466        WritableComparable lastKey = null;
467        long lastIndex = -1;
468        ArrayList<WritableComparable> keyBuilder = new ArrayList<WritableComparable>(1024);
469        while (true) {
470          WritableComparable k = comparator.newKey();
471
472          if (!index.next(k, position))
473            break;
474
475          // check order to make sure comparator is compatible
476          if (lastKey != null && comparator.compare(lastKey, k) > 0)
477            throw new IOException("key out of order: "+k+" after "+lastKey);
478          lastKey = k;
479          if (skip > 0) {
480            skip--;
481            continue;                             // skip this entry
482          } else {
483            skip = INDEX_SKIP;                    // reset skip
484          }
485
486          // don't read an index that is the same as the previous one. Block
487          // compressed map files used to do this (multiple entries would point
488          // at the same block)
489          if (position.get() == lastIndex)
490            continue;
491
492          if (count == positions.length) {
493            positions = Arrays.copyOf(positions, positions.length * 2);
494          }
495
496          keyBuilder.add(k);
497          positions[count] = position.get();
498          count++;
499        }
500
501        this.keys = keyBuilder.toArray(new WritableComparable[count]);
502        positions = Arrays.copyOf(positions, count);
503      } catch (EOFException e) {
504        LOG.warn("Unexpected EOF reading " + index +
505                              " at entry #" + count + ".  Ignoring.");
506      } finally {
507        indexClosed = true;
508        index.close();
509      }
510    }
511
512    /** Re-positions the reader before its first key. */
513    public synchronized void reset() throws IOException {
514      data.seek(firstPosition);
515    }
516
517    /** Get the key at approximately the middle of the file. Or null if the
518     *  file is empty. 
519     */
520    public synchronized WritableComparable midKey() throws IOException {
521
522      readIndex();
523      if (count == 0) {
524        return null;
525      }
526    
527      return keys[(count - 1) / 2];
528    }
529    
530    /** Reads the final key from the file.
531     *
532     * @param key key to read into
533     */
534    public synchronized void finalKey(WritableComparable key)
535      throws IOException {
536
537      long originalPosition = data.getPosition(); // save position
538      try {
539        readIndex();                              // make sure index is valid
540        if (count > 0) {
541          data.seek(positions[count-1]);          // skip to last indexed entry
542        } else {
543          reset();                                // start at the beginning
544        }
545        while (data.next(key)) {}                 // scan to eof
546
547      } finally {
548        data.seek(originalPosition);              // restore position
549      }
550    }
551
552    /** Positions the reader at the named key, or if none such exists, at the
553     * first entry after the named key.  Returns true iff the named key exists
554     * in this map.
555     */
556    public synchronized boolean seek(WritableComparable key) throws IOException {
557      return seekInternal(key) == 0;
558    }
559
560    /** 
561     * Positions the reader at the named key, or if none such exists, at the
562     * first entry after the named key.
563     *
564     * @return  0   - exact match found
565     *          < 0 - positioned at next record
566     *          1   - no more records in file
567     */
568    private synchronized int seekInternal(WritableComparable key)
569      throws IOException {
570      return seekInternal(key, false);
571    }
572
573    /** 
574     * Positions the reader at the named key, or if none such exists, at the
575     * key that falls just before or just after dependent on how the
576     * <code>before</code> parameter is set.
577     * 
578     * @param before - IF true, and <code>key</code> does not exist, position
579     * file at entry that falls just before <code>key</code>.  Otherwise,
580     * position file at record that sorts just after.
581     * @return  0   - exact match found
582     *          < 0 - positioned at next record
583     *          1   - no more records in file
584     */
585    private synchronized int seekInternal(WritableComparable key,
586        final boolean before)
587      throws IOException {
588      readIndex();                                // make sure index is read
589
590      if (seekIndex != -1                         // seeked before
591          && seekIndex+1 < count           
592          && comparator.compare(key, keys[seekIndex+1])<0 // before next indexed
593          && comparator.compare(key, nextKey)
594          >= 0) {                                 // but after last seeked
595        // do nothing
596      } else {
597        seekIndex = binarySearch(key);
598        if (seekIndex < 0)                        // decode insertion point
599          seekIndex = -seekIndex-2;
600
601        if (seekIndex == -1)                      // belongs before first entry
602          seekPosition = firstPosition;           // use beginning of file
603        else
604          seekPosition = positions[seekIndex];    // else use index
605      }
606      data.seek(seekPosition);
607      
608      if (nextKey == null)
609        nextKey = comparator.newKey();
610     
611      // If we're looking for the key before, we need to keep track
612      // of the position we got the current key as well as the position
613      // of the key before it.
614      long prevPosition = -1;
615      long curPosition = seekPosition;
616
617      while (data.next(nextKey)) {
618        int c = comparator.compare(key, nextKey);
619        if (c <= 0) {                             // at or beyond desired
620          if (before && c != 0) {
621            if (prevPosition == -1) {
622              // We're on the first record of this index block
623              // and we've already passed the search key. Therefore
624              // we must be at the beginning of the file, so seek
625              // to the beginning of this block and return c
626              data.seek(curPosition);
627            } else {
628              // We have a previous record to back up to
629              data.seek(prevPosition);
630              data.next(nextKey);
631              // now that we've rewound, the search key must be greater than this key
632              return 1;
633            }
634          }
635          return c;
636        }
637        if (before) {
638          prevPosition = curPosition;
639          curPosition = data.getPosition();
640        }
641      }
642
643      return 1;
644    }
645
646    private int binarySearch(WritableComparable key) {
647      int low = 0;
648      int high = count-1;
649
650      while (low <= high) {
651        int mid = (low + high) >>> 1;
652        WritableComparable midVal = keys[mid];
653        int cmp = comparator.compare(midVal, key);
654
655        if (cmp < 0)
656          low = mid + 1;
657        else if (cmp > 0)
658          high = mid - 1;
659        else
660          return mid;                             // key found
661      }
662      return -(low + 1);                          // key not found.
663    }
664
665    /** Read the next key/value pair in the map into <code>key</code> and
666     * <code>val</code>.  Returns true if such a pair exists and false when at
667     * the end of the map */
668    public synchronized boolean next(WritableComparable key, Writable val)
669      throws IOException {
670      return data.next(key, val);
671    }
672
673    /** Return the value for the named key, or null if none exists. */
674    public synchronized Writable get(WritableComparable key, Writable val)
675      throws IOException {
676      if (seek(key)) {
677        data.getCurrentValue(val);
678        return val;
679      } else
680        return null;
681    }
682
683    /** 
684     * Finds the record that is the closest match to the specified key.
685     * Returns <code>key</code> or if it does not exist, at the first entry
686     * after the named key.
687     * 
688-     * @param key       - key that we're trying to find
689-     * @param val       - data value if key is found
690-     * @return          - the key that was the closest match or null if eof.
691     */
692    public synchronized WritableComparable getClosest(WritableComparable key,
693      Writable val)
694    throws IOException {
695      return getClosest(key, val, false);
696    }
697
698    /** 
699     * Finds the record that is the closest match to the specified key.
700     * 
701     * @param key       - key that we're trying to find
702     * @param val       - data value if key is found
703     * @param before    - IF true, and <code>key</code> does not exist, return
704     * the first entry that falls just before the <code>key</code>.  Otherwise,
705     * return the record that sorts just after.
706     * @return          - the key that was the closest match or null if eof.
707     */
708    public synchronized WritableComparable getClosest(WritableComparable key,
709        Writable val, final boolean before)
710      throws IOException {
711     
712      int c = seekInternal(key, before);
713
714      // If we didn't get an exact match, and we ended up in the wrong
715      // direction relative to the query key, return null since we
716      // must be at the beginning or end of the file.
717      if ((!before && c > 0) ||
718          (before && c < 0)) {
719        return null;
720      }
721
722      data.getCurrentValue(val);
723      return nextKey;
724    }
725
726    /** Close the map. */
727    @Override
728    public synchronized void close() throws IOException {
729      if (!indexClosed) {
730        index.close();
731      }
732      data.close();
733    }
734
735  }
736
737  /** Renames an existing map directory. */
738  public static void rename(FileSystem fs, String oldName, String newName)
739    throws IOException {
740    Path oldDir = new Path(oldName);
741    Path newDir = new Path(newName);
742    if (!fs.rename(oldDir, newDir)) {
743      throw new IOException("Could not rename " + oldDir + " to " + newDir);
744    }
745  }
746
747  /** Deletes the named map file. */
748  public static void delete(FileSystem fs, String name) throws IOException {
749    Path dir = new Path(name);
750    Path data = new Path(dir, DATA_FILE_NAME);
751    Path index = new Path(dir, INDEX_FILE_NAME);
752
753    fs.delete(data, true);
754    fs.delete(index, true);
755    fs.delete(dir, true);
756  }
757
758  /**
759   * This method attempts to fix a corrupt MapFile by re-creating its index.
760   * @param fs filesystem
761   * @param dir directory containing the MapFile data and index
762   * @param keyClass key class (has to be a subclass of Writable)
763   * @param valueClass value class (has to be a subclass of Writable)
764   * @param dryrun do not perform any changes, just report what needs to be done
765   * @return number of valid entries in this MapFile, or -1 if no fixing was needed
766   * @throws Exception
767   */
768  public static long fix(FileSystem fs, Path dir,
769                         Class<? extends Writable> keyClass,
770                         Class<? extends Writable> valueClass, boolean dryrun,
771                         Configuration conf) throws Exception {
772    String dr = (dryrun ? "[DRY RUN ] " : "");
773    Path data = new Path(dir, DATA_FILE_NAME);
774    Path index = new Path(dir, INDEX_FILE_NAME);
775    int indexInterval = conf.getInt(Writer.INDEX_INTERVAL, 128);
776    if (!fs.exists(data)) {
777      // there's nothing we can do to fix this!
778      throw new Exception(dr + "Missing data file in " + dir + ", impossible to fix this.");
779    }
780    if (fs.exists(index)) {
781      // no fixing needed
782      return -1;
783    }
784    SequenceFile.Reader dataReader = 
785      new SequenceFile.Reader(conf, SequenceFile.Reader.file(data));
786    if (!dataReader.getKeyClass().equals(keyClass)) {
787      throw new Exception(dr + "Wrong key class in " + dir + ", expected" + keyClass.getName() +
788                          ", got " + dataReader.getKeyClass().getName());
789    }
790    if (!dataReader.getValueClass().equals(valueClass)) {
791      throw new Exception(dr + "Wrong value class in " + dir + ", expected" + valueClass.getName() +
792                          ", got " + dataReader.getValueClass().getName());
793    }
794    long cnt = 0L;
795    Writable key = ReflectionUtils.newInstance(keyClass, conf);
796    Writable value = ReflectionUtils.newInstance(valueClass, conf);
797    SequenceFile.Writer indexWriter = null;
798    if (!dryrun) {
799      indexWriter = 
800        SequenceFile.createWriter(conf, 
801                                  SequenceFile.Writer.file(index), 
802                                  SequenceFile.Writer.keyClass(keyClass), 
803                                  SequenceFile.Writer.valueClass
804                                    (LongWritable.class));
805    }
806    try {
807      long pos = 0L;
808      LongWritable position = new LongWritable();
809      while(dataReader.next(key, value)) {
810        cnt++;
811        if (cnt % indexInterval == 0) {
812          position.set(pos);
813          if (!dryrun) indexWriter.append(key, position);
814        }
815        pos = dataReader.getPosition();
816      }
817    } catch(Throwable t) {
818      // truncated data file. swallow it.
819    }
820    dataReader.close();
821    if (!dryrun) indexWriter.close();
822    return cnt;
823  }
824
825
826  public static void main(String[] args) throws Exception {
827    String usage = "Usage: MapFile inFile outFile";
828      
829    if (args.length != 2) {
830      System.err.println(usage);
831      System.exit(-1);
832    }
833      
834    String in = args[0];
835    String out = args[1];
836
837    Configuration conf = new Configuration();
838    FileSystem fs = FileSystem.getLocal(conf);
839    MapFile.Reader reader = new MapFile.Reader(fs, in, conf);
840    MapFile.Writer writer =
841      new MapFile.Writer(conf, fs, out,
842          reader.getKeyClass().asSubclass(WritableComparable.class),
843          reader.getValueClass());
844
845    WritableComparable key =
846      ReflectionUtils.newInstance(reader.getKeyClass().asSubclass(WritableComparable.class), conf);
847    Writable value =
848      ReflectionUtils.newInstance(reader.getValueClass().asSubclass(Writable.class), conf);
849
850    while (reader.next(key, value))               // copy all entries
851      writer.append(key, value);
852
853    writer.close();
854  }
855
856}