001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    
019    package org.apache.hadoop.io;
020    
021    import java.io.EOFException;
022    import java.io.IOException;
023    import java.util.ArrayList;
024    import java.util.Arrays;
025    
026    import org.apache.commons.logging.Log;
027    import org.apache.commons.logging.LogFactory;
028    import org.apache.hadoop.classification.InterfaceAudience;
029    import org.apache.hadoop.classification.InterfaceStability;
030    import org.apache.hadoop.conf.Configuration;
031    import org.apache.hadoop.fs.FileSystem;
032    import org.apache.hadoop.fs.Path;
033    import org.apache.hadoop.io.SequenceFile.CompressionType;
034    import org.apache.hadoop.io.compress.CompressionCodec;
035    import org.apache.hadoop.util.Options;
036    import org.apache.hadoop.util.Progressable;
037    import org.apache.hadoop.util.ReflectionUtils;
038    
039    /** A file-based map from keys to values.
040     * 
041     * <p>A map is a directory containing two files, the <code>data</code> file,
042     * containing all keys and values in the map, and a smaller <code>index</code>
043     * file, containing a fraction of the keys.  The fraction is determined by
044     * {@link Writer#getIndexInterval()}.
045     *
046     * <p>The index file is read entirely into memory.  Thus key implementations
047     * should try to keep themselves small.
048     *
049     * <p>Map files are created by adding entries in-order.  To maintain a large
050     * database, perform updates by copying the previous version of a database and
051     * merging in a sorted change list, to create a new version of the database in
052     * a new file.  Sorting large change lists can be done with {@link
053     * SequenceFile.Sorter}.
054     */
055    @InterfaceAudience.Public
056    @InterfaceStability.Stable
057    public class MapFile {
058      private static final Log LOG = LogFactory.getLog(MapFile.class);
059    
060      /** The name of the index file. */
061      public static final String INDEX_FILE_NAME = "index";
062    
063      /** The name of the data file. */
064      public static final String DATA_FILE_NAME = "data";
065    
066      protected MapFile() {}                          // no public ctor
067    
068      /** Writes a new map. */
069      public static class Writer implements java.io.Closeable {
070        private SequenceFile.Writer data;
071        private SequenceFile.Writer index;
072    
073        final private static String INDEX_INTERVAL = "io.map.index.interval";
074        private int indexInterval = 128;
075    
076        private long size;
077        private LongWritable position = new LongWritable();
078    
079        // the following fields are used only for checking key order
080        private WritableComparator comparator;
081        private DataInputBuffer inBuf = new DataInputBuffer();
082        private DataOutputBuffer outBuf = new DataOutputBuffer();
083        private WritableComparable lastKey;
084    
085        /** What's the position (in bytes) we wrote when we got the last index */
086        private long lastIndexPos = -1;
087    
088        /**
089         * What was size when we last wrote an index. Set to MIN_VALUE to ensure that
090         * we have an index at position zero -- midKey will throw an exception if this
091         * is not the case
092         */
093        private long lastIndexKeyCount = Long.MIN_VALUE;
094    
095    
096        /** Create the named map for keys of the named class. 
097         * @deprecated Use Writer(Configuration, Path, Option...) instead.
098         */
099        @Deprecated
100        public Writer(Configuration conf, FileSystem fs, String dirName,
101                      Class<? extends WritableComparable> keyClass, 
102                      Class valClass) throws IOException {
103          this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass));
104        }
105    
106        /** Create the named map for keys of the named class. 
107         * @deprecated Use Writer(Configuration, Path, Option...) instead.
108         */
109        @Deprecated
110        public Writer(Configuration conf, FileSystem fs, String dirName,
111                      Class<? extends WritableComparable> keyClass, Class valClass,
112                      CompressionType compress, 
113                      Progressable progress) throws IOException {
114          this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass),
115               compression(compress), progressable(progress));
116        }
117    
118        /** Create the named map for keys of the named class. 
119         * @deprecated Use Writer(Configuration, Path, Option...) instead.
120         */
121        @Deprecated
122        public Writer(Configuration conf, FileSystem fs, String dirName,
123                      Class<? extends WritableComparable> keyClass, Class valClass,
124                      CompressionType compress, CompressionCodec codec,
125                      Progressable progress) throws IOException {
126          this(conf, new Path(dirName), keyClass(keyClass), valueClass(valClass),
127               compression(compress, codec), progressable(progress));
128        }
129    
130        /** Create the named map for keys of the named class. 
131         * @deprecated Use Writer(Configuration, Path, Option...) instead.
132         */
133        @Deprecated
134        public Writer(Configuration conf, FileSystem fs, String dirName,
135                      Class<? extends WritableComparable> keyClass, Class valClass,
136                      CompressionType compress) throws IOException {
137          this(conf, new Path(dirName), keyClass(keyClass),
138               valueClass(valClass), compression(compress));
139        }
140    
141        /** Create the named map using the named key comparator. 
142         * @deprecated Use Writer(Configuration, Path, Option...) instead.
143         */
144        @Deprecated
145        public Writer(Configuration conf, FileSystem fs, String dirName,
146                      WritableComparator comparator, Class valClass
147                      ) throws IOException {
148          this(conf, new Path(dirName), comparator(comparator), 
149               valueClass(valClass));
150        }
151    
152        /** Create the named map using the named key comparator. 
153         * @deprecated Use Writer(Configuration, Path, Option...) instead.
154         */
155        @Deprecated
156        public Writer(Configuration conf, FileSystem fs, String dirName,
157                      WritableComparator comparator, Class valClass,
158                      SequenceFile.CompressionType compress) throws IOException {
159          this(conf, new Path(dirName), comparator(comparator),
160               valueClass(valClass), compression(compress));
161        }
162    
163        /** Create the named map using the named key comparator. 
164         * @deprecated Use Writer(Configuration, Path, Option...)} instead.
165         */
166        @Deprecated
167        public Writer(Configuration conf, FileSystem fs, String dirName,
168                      WritableComparator comparator, Class valClass,
169                      SequenceFile.CompressionType compress,
170                      Progressable progress) throws IOException {
171          this(conf, new Path(dirName), comparator(comparator),
172               valueClass(valClass), compression(compress),
173               progressable(progress));
174        }
175    
176        /** Create the named map using the named key comparator. 
177         * @deprecated Use Writer(Configuration, Path, Option...) instead.
178         */
179        @Deprecated
180        public Writer(Configuration conf, FileSystem fs, String dirName,
181                      WritableComparator comparator, Class valClass,
182                      SequenceFile.CompressionType compress, CompressionCodec codec,
183                      Progressable progress) throws IOException {
184          this(conf, new Path(dirName), comparator(comparator),
185               valueClass(valClass), compression(compress, codec),
186               progressable(progress));
187        }
188        
189        // our options are a superset of sequence file writer options
190        public static interface Option extends SequenceFile.Writer.Option { }
191        
192        private static class KeyClassOption extends Options.ClassOption
193                                            implements Option {
194          KeyClassOption(Class<?> value) {
195            super(value);
196          }
197        }
198        
199        private static class ComparatorOption implements Option {
200          private final WritableComparator value;
201          ComparatorOption(WritableComparator value) {
202            this.value = value;
203          }
204          WritableComparator getValue() {
205            return value;
206          }
207        }
208    
209        public static Option keyClass(Class<? extends WritableComparable> value) {
210          return new KeyClassOption(value);
211        }
212        
213        public static Option comparator(WritableComparator value) {
214          return new ComparatorOption(value);
215        }
216    
217        public static SequenceFile.Writer.Option valueClass(Class<?> value) {
218          return SequenceFile.Writer.valueClass(value);
219        }
220        
221        public static 
222        SequenceFile.Writer.Option compression(CompressionType type) {
223          return SequenceFile.Writer.compression(type);
224        }
225    
226        public static 
227        SequenceFile.Writer.Option compression(CompressionType type,
228            CompressionCodec codec) {
229          return SequenceFile.Writer.compression(type, codec);
230        }
231    
232        public static SequenceFile.Writer.Option progressable(Progressable value) {
233          return SequenceFile.Writer.progressable(value);
234        }
235    
236        @SuppressWarnings("unchecked")
237        public Writer(Configuration conf, 
238                      Path dirName,
239                      SequenceFile.Writer.Option... opts
240                      ) throws IOException {
241          KeyClassOption keyClassOption = 
242            Options.getOption(KeyClassOption.class, opts);
243          ComparatorOption comparatorOption =
244            Options.getOption(ComparatorOption.class, opts);
245          if ((keyClassOption == null) == (comparatorOption == null)) {
246            throw new IllegalArgumentException("key class or comparator option "
247                                               + "must be set");
248          }
249          this.indexInterval = conf.getInt(INDEX_INTERVAL, this.indexInterval);
250    
251          Class<? extends WritableComparable> keyClass;
252          if (keyClassOption == null) {
253            this.comparator = comparatorOption.getValue();
254            keyClass = comparator.getKeyClass();
255          } else {
256            keyClass= 
257              (Class<? extends WritableComparable>) keyClassOption.getValue();
258            this.comparator = WritableComparator.get(keyClass);
259          }
260          this.lastKey = comparator.newKey();
261          FileSystem fs = dirName.getFileSystem(conf);
262    
263          if (!fs.mkdirs(dirName)) {
264            throw new IOException("Mkdirs failed to create directory " + dirName);
265          }
266          Path dataFile = new Path(dirName, DATA_FILE_NAME);
267          Path indexFile = new Path(dirName, INDEX_FILE_NAME);
268    
269          SequenceFile.Writer.Option[] dataOptions =
270            Options.prependOptions(opts, 
271                                   SequenceFile.Writer.file(dataFile),
272                                   SequenceFile.Writer.keyClass(keyClass));
273          this.data = SequenceFile.createWriter(conf, dataOptions);
274    
275          SequenceFile.Writer.Option[] indexOptions =
276            Options.prependOptions(opts, SequenceFile.Writer.file(indexFile),
277                SequenceFile.Writer.keyClass(keyClass),
278                SequenceFile.Writer.valueClass(LongWritable.class),
279                SequenceFile.Writer.compression(CompressionType.BLOCK));
280          this.index = SequenceFile.createWriter(conf, indexOptions);      
281        }
282    
283        /** The number of entries that are added before an index entry is added.*/
284        public int getIndexInterval() { return indexInterval; }
285    
286        /** Sets the index interval.
287         * @see #getIndexInterval()
288         */
289        public void setIndexInterval(int interval) { indexInterval = interval; }
290    
291        /** Sets the index interval and stores it in conf
292         * @see #getIndexInterval()
293         */
294        public static void setIndexInterval(Configuration conf, int interval) {
295          conf.setInt(INDEX_INTERVAL, interval);
296        }
297    
298        /** Close the map. */
299        @Override
300        public synchronized void close() throws IOException {
301          data.close();
302          index.close();
303        }
304    
305        /** Append a key/value pair to the map.  The key must be greater or equal
306         * to the previous key added to the map. */
307        public synchronized void append(WritableComparable key, Writable val)
308          throws IOException {
309    
310          checkKey(key);
311    
312          long pos = data.getLength();      
313          // Only write an index if we've changed positions. In a block compressed
314          // file, this means we write an entry at the start of each block      
315          if (size >= lastIndexKeyCount + indexInterval && pos > lastIndexPos) {
316            position.set(pos);                        // point to current eof
317            index.append(key, position);
318            lastIndexPos = pos;
319            lastIndexKeyCount = size;
320          }
321    
322          data.append(key, val);                      // append key/value to data
323          size++;
324        }
325    
326        private void checkKey(WritableComparable key) throws IOException {
327          // check that keys are well-ordered
328          if (size != 0 && comparator.compare(lastKey, key) > 0)
329            throw new IOException("key out of order: "+key+" after "+lastKey);
330              
331          // update lastKey with a copy of key by writing and reading
332          outBuf.reset();
333          key.write(outBuf);                          // write new key
334    
335          inBuf.reset(outBuf.getData(), outBuf.getLength());
336          lastKey.readFields(inBuf);                  // read into lastKey
337        }
338    
339      }
340      
341      /** Provide access to an existing map. */
342      public static class Reader implements java.io.Closeable {
343          
344        /** Number of index entries to skip between each entry.  Zero by default.
345         * Setting this to values larger than zero can facilitate opening large map
346         * files using less memory. */
347        private int INDEX_SKIP = 0;
348          
349        private WritableComparator comparator;
350    
351        private WritableComparable nextKey;
352        private long seekPosition = -1;
353        private int seekIndex = -1;
354        private long firstPosition;
355    
356        // the data, on disk
357        private SequenceFile.Reader data;
358        private SequenceFile.Reader index;
359    
360        // whether the index Reader was closed
361        private boolean indexClosed = false;
362    
363        // the index, in memory
364        private int count = -1;
365        private WritableComparable[] keys;
366        private long[] positions;
367    
368        /** Returns the class of keys in this file. */
369        public Class<?> getKeyClass() { return data.getKeyClass(); }
370    
371        /** Returns the class of values in this file. */
372        public Class<?> getValueClass() { return data.getValueClass(); }
373    
374        public static interface Option extends SequenceFile.Reader.Option {}
375        
376        public static Option comparator(WritableComparator value) {
377          return new ComparatorOption(value);
378        }
379    
380        static class ComparatorOption implements Option {
381          private final WritableComparator value;
382          ComparatorOption(WritableComparator value) {
383            this.value = value;
384          }
385          WritableComparator getValue() {
386            return value;
387          }
388        }
389    
390        public Reader(Path dir, Configuration conf,
391                      SequenceFile.Reader.Option... opts) throws IOException {
392          ComparatorOption comparatorOption = 
393            Options.getOption(ComparatorOption.class, opts);
394          WritableComparator comparator =
395            comparatorOption == null ? null : comparatorOption.getValue();
396          INDEX_SKIP = conf.getInt("io.map.index.skip", 0);
397          open(dir, comparator, conf, opts);
398        }
399     
400        /** Construct a map reader for the named map.
401         * @deprecated
402         */
403        @Deprecated
404        public Reader(FileSystem fs, String dirName, 
405                      Configuration conf) throws IOException {
406          this(new Path(dirName), conf);
407        }
408    
409        /** Construct a map reader for the named map using the named comparator.
410         * @deprecated
411         */
412        @Deprecated
413        public Reader(FileSystem fs, String dirName, WritableComparator comparator, 
414                      Configuration conf) throws IOException {
415          this(new Path(dirName), conf, comparator(comparator));
416        }
417        
418        protected synchronized void open(Path dir,
419                                         WritableComparator comparator,
420                                         Configuration conf, 
421                                         SequenceFile.Reader.Option... options
422                                         ) throws IOException {
423          Path dataFile = new Path(dir, DATA_FILE_NAME);
424          Path indexFile = new Path(dir, INDEX_FILE_NAME);
425    
426          // open the data
427          this.data = createDataFileReader(dataFile, conf, options);
428          this.firstPosition = data.getPosition();
429    
430          if (comparator == null)
431            this.comparator = 
432              WritableComparator.get(data.getKeyClass().
433                                       asSubclass(WritableComparable.class));
434          else
435            this.comparator = comparator;
436    
437          // open the index
438          SequenceFile.Reader.Option[] indexOptions =
439            Options.prependOptions(options, SequenceFile.Reader.file(indexFile));
440          this.index = new SequenceFile.Reader(conf, indexOptions);
441        }
442    
443        /**
444         * Override this method to specialize the type of
445         * {@link SequenceFile.Reader} returned.
446         */
447        protected SequenceFile.Reader 
448          createDataFileReader(Path dataFile, Configuration conf,
449                               SequenceFile.Reader.Option... options
450                               ) throws IOException {
451          SequenceFile.Reader.Option[] newOptions =
452            Options.prependOptions(options, SequenceFile.Reader.file(dataFile));
453          return new SequenceFile.Reader(conf, newOptions);
454        }
455    
456        private void readIndex() throws IOException {
457          // read the index entirely into memory
458          if (this.keys != null)
459            return;
460          this.count = 0;
461          this.positions = new long[1024];
462    
463          try {
464            int skip = INDEX_SKIP;
465            LongWritable position = new LongWritable();
466            WritableComparable lastKey = null;
467            long lastIndex = -1;
468            ArrayList<WritableComparable> keyBuilder = new ArrayList<WritableComparable>(1024);
469            while (true) {
470              WritableComparable k = comparator.newKey();
471    
472              if (!index.next(k, position))
473                break;
474    
475              // check order to make sure comparator is compatible
476              if (lastKey != null && comparator.compare(lastKey, k) > 0)
477                throw new IOException("key out of order: "+k+" after "+lastKey);
478              lastKey = k;
479              if (skip > 0) {
480                skip--;
481                continue;                             // skip this entry
482              } else {
483                skip = INDEX_SKIP;                    // reset skip
484              }
485    
486              // don't read an index that is the same as the previous one. Block
487              // compressed map files used to do this (multiple entries would point
488              // at the same block)
489              if (position.get() == lastIndex)
490                continue;
491    
492              if (count == positions.length) {
493                positions = Arrays.copyOf(positions, positions.length * 2);
494              }
495    
496              keyBuilder.add(k);
497              positions[count] = position.get();
498              count++;
499            }
500    
501            this.keys = keyBuilder.toArray(new WritableComparable[count]);
502            positions = Arrays.copyOf(positions, count);
503          } catch (EOFException e) {
504            LOG.warn("Unexpected EOF reading " + index +
505                                  " at entry #" + count + ".  Ignoring.");
506          } finally {
507            indexClosed = true;
508            index.close();
509          }
510        }
511    
512        /** Re-positions the reader before its first key. */
513        public synchronized void reset() throws IOException {
514          data.seek(firstPosition);
515        }
516    
517        /** Get the key at approximately the middle of the file. Or null if the
518         *  file is empty. 
519         */
520        public synchronized WritableComparable midKey() throws IOException {
521    
522          readIndex();
523          if (count == 0) {
524            return null;
525          }
526        
527          return keys[(count - 1) / 2];
528        }
529        
530        /** Reads the final key from the file.
531         *
532         * @param key key to read into
533         */
534        public synchronized void finalKey(WritableComparable key)
535          throws IOException {
536    
537          long originalPosition = data.getPosition(); // save position
538          try {
539            readIndex();                              // make sure index is valid
540            if (count > 0) {
541              data.seek(positions[count-1]);          // skip to last indexed entry
542            } else {
543              reset();                                // start at the beginning
544            }
545            while (data.next(key)) {}                 // scan to eof
546    
547          } finally {
548            data.seek(originalPosition);              // restore position
549          }
550        }
551    
552        /** Positions the reader at the named key, or if none such exists, at the
553         * first entry after the named key.  Returns true iff the named key exists
554         * in this map.
555         */
556        public synchronized boolean seek(WritableComparable key) throws IOException {
557          return seekInternal(key) == 0;
558        }
559    
560        /** 
561         * Positions the reader at the named key, or if none such exists, at the
562         * first entry after the named key.
563         *
564         * @return  0   - exact match found
565         *          < 0 - positioned at next record
566         *          1   - no more records in file
567         */
568        private synchronized int seekInternal(WritableComparable key)
569          throws IOException {
570          return seekInternal(key, false);
571        }
572    
573        /** 
574         * Positions the reader at the named key, or if none such exists, at the
575         * key that falls just before or just after dependent on how the
576         * <code>before</code> parameter is set.
577         * 
578         * @param before - IF true, and <code>key</code> does not exist, position
579         * file at entry that falls just before <code>key</code>.  Otherwise,
580         * position file at record that sorts just after.
581         * @return  0   - exact match found
582         *          < 0 - positioned at next record
583         *          1   - no more records in file
584         */
585        private synchronized int seekInternal(WritableComparable key,
586            final boolean before)
587          throws IOException {
588          readIndex();                                // make sure index is read
589    
590          if (seekIndex != -1                         // seeked before
591              && seekIndex+1 < count           
592              && comparator.compare(key, keys[seekIndex+1])<0 // before next indexed
593              && comparator.compare(key, nextKey)
594              >= 0) {                                 // but after last seeked
595            // do nothing
596          } else {
597            seekIndex = binarySearch(key);
598            if (seekIndex < 0)                        // decode insertion point
599              seekIndex = -seekIndex-2;
600    
601            if (seekIndex == -1)                      // belongs before first entry
602              seekPosition = firstPosition;           // use beginning of file
603            else
604              seekPosition = positions[seekIndex];    // else use index
605          }
606          data.seek(seekPosition);
607          
608          if (nextKey == null)
609            nextKey = comparator.newKey();
610         
611          // If we're looking for the key before, we need to keep track
612          // of the position we got the current key as well as the position
613          // of the key before it.
614          long prevPosition = -1;
615          long curPosition = seekPosition;
616    
617          while (data.next(nextKey)) {
618            int c = comparator.compare(key, nextKey);
619            if (c <= 0) {                             // at or beyond desired
620              if (before && c != 0) {
621                if (prevPosition == -1) {
622                  // We're on the first record of this index block
623                  // and we've already passed the search key. Therefore
624                  // we must be at the beginning of the file, so seek
625                  // to the beginning of this block and return c
626                  data.seek(curPosition);
627                } else {
628                  // We have a previous record to back up to
629                  data.seek(prevPosition);
630                  data.next(nextKey);
631                  // now that we've rewound, the search key must be greater than this key
632                  return 1;
633                }
634              }
635              return c;
636            }
637            if (before) {
638              prevPosition = curPosition;
639              curPosition = data.getPosition();
640            }
641          }
642    
643          return 1;
644        }
645    
646        private int binarySearch(WritableComparable key) {
647          int low = 0;
648          int high = count-1;
649    
650          while (low <= high) {
651            int mid = (low + high) >>> 1;
652            WritableComparable midVal = keys[mid];
653            int cmp = comparator.compare(midVal, key);
654    
655            if (cmp < 0)
656              low = mid + 1;
657            else if (cmp > 0)
658              high = mid - 1;
659            else
660              return mid;                             // key found
661          }
662          return -(low + 1);                          // key not found.
663        }
664    
665        /** Read the next key/value pair in the map into <code>key</code> and
666         * <code>val</code>.  Returns true if such a pair exists and false when at
667         * the end of the map */
668        public synchronized boolean next(WritableComparable key, Writable val)
669          throws IOException {
670          return data.next(key, val);
671        }
672    
673        /** Return the value for the named key, or null if none exists. */
674        public synchronized Writable get(WritableComparable key, Writable val)
675          throws IOException {
676          if (seek(key)) {
677            data.getCurrentValue(val);
678            return val;
679          } else
680            return null;
681        }
682    
683        /** 
684         * Finds the record that is the closest match to the specified key.
685         * Returns <code>key</code> or if it does not exist, at the first entry
686         * after the named key.
687         * 
688    -     * @param key       - key that we're trying to find
689    -     * @param val       - data value if key is found
690    -     * @return          - the key that was the closest match or null if eof.
691         */
692        public synchronized WritableComparable getClosest(WritableComparable key,
693          Writable val)
694        throws IOException {
695          return getClosest(key, val, false);
696        }
697    
698        /** 
699         * Finds the record that is the closest match to the specified key.
700         * 
701         * @param key       - key that we're trying to find
702         * @param val       - data value if key is found
703         * @param before    - IF true, and <code>key</code> does not exist, return
704         * the first entry that falls just before the <code>key</code>.  Otherwise,
705         * return the record that sorts just after.
706         * @return          - the key that was the closest match or null if eof.
707         */
708        public synchronized WritableComparable getClosest(WritableComparable key,
709            Writable val, final boolean before)
710          throws IOException {
711         
712          int c = seekInternal(key, before);
713    
714          // If we didn't get an exact match, and we ended up in the wrong
715          // direction relative to the query key, return null since we
716          // must be at the beginning or end of the file.
717          if ((!before && c > 0) ||
718              (before && c < 0)) {
719            return null;
720          }
721    
722          data.getCurrentValue(val);
723          return nextKey;
724        }
725    
726        /** Close the map. */
727        @Override
728        public synchronized void close() throws IOException {
729          if (!indexClosed) {
730            index.close();
731          }
732          data.close();
733        }
734    
735      }
736    
737      /** Renames an existing map directory. */
738      public static void rename(FileSystem fs, String oldName, String newName)
739        throws IOException {
740        Path oldDir = new Path(oldName);
741        Path newDir = new Path(newName);
742        if (!fs.rename(oldDir, newDir)) {
743          throw new IOException("Could not rename " + oldDir + " to " + newDir);
744        }
745      }
746    
747      /** Deletes the named map file. */
748      public static void delete(FileSystem fs, String name) throws IOException {
749        Path dir = new Path(name);
750        Path data = new Path(dir, DATA_FILE_NAME);
751        Path index = new Path(dir, INDEX_FILE_NAME);
752    
753        fs.delete(data, true);
754        fs.delete(index, true);
755        fs.delete(dir, true);
756      }
757    
758      /**
759       * This method attempts to fix a corrupt MapFile by re-creating its index.
760       * @param fs filesystem
761       * @param dir directory containing the MapFile data and index
762       * @param keyClass key class (has to be a subclass of Writable)
763       * @param valueClass value class (has to be a subclass of Writable)
764       * @param dryrun do not perform any changes, just report what needs to be done
765       * @return number of valid entries in this MapFile, or -1 if no fixing was needed
766       * @throws Exception
767       */
768      public static long fix(FileSystem fs, Path dir,
769                             Class<? extends Writable> keyClass,
770                             Class<? extends Writable> valueClass, boolean dryrun,
771                             Configuration conf) throws Exception {
772        String dr = (dryrun ? "[DRY RUN ] " : "");
773        Path data = new Path(dir, DATA_FILE_NAME);
774        Path index = new Path(dir, INDEX_FILE_NAME);
775        int indexInterval = conf.getInt(Writer.INDEX_INTERVAL, 128);
776        if (!fs.exists(data)) {
777          // there's nothing we can do to fix this!
778          throw new Exception(dr + "Missing data file in " + dir + ", impossible to fix this.");
779        }
780        if (fs.exists(index)) {
781          // no fixing needed
782          return -1;
783        }
784        SequenceFile.Reader dataReader = 
785          new SequenceFile.Reader(conf, SequenceFile.Reader.file(data));
786        if (!dataReader.getKeyClass().equals(keyClass)) {
787          throw new Exception(dr + "Wrong key class in " + dir + ", expected" + keyClass.getName() +
788                              ", got " + dataReader.getKeyClass().getName());
789        }
790        if (!dataReader.getValueClass().equals(valueClass)) {
791          throw new Exception(dr + "Wrong value class in " + dir + ", expected" + valueClass.getName() +
792                              ", got " + dataReader.getValueClass().getName());
793        }
794        long cnt = 0L;
795        Writable key = ReflectionUtils.newInstance(keyClass, conf);
796        Writable value = ReflectionUtils.newInstance(valueClass, conf);
797        SequenceFile.Writer indexWriter = null;
798        if (!dryrun) {
799          indexWriter = 
800            SequenceFile.createWriter(conf, 
801                                      SequenceFile.Writer.file(index), 
802                                      SequenceFile.Writer.keyClass(keyClass), 
803                                      SequenceFile.Writer.valueClass
804                                        (LongWritable.class));
805        }
806        try {
807          long pos = 0L;
808          LongWritable position = new LongWritable();
809          while(dataReader.next(key, value)) {
810            cnt++;
811            if (cnt % indexInterval == 0) {
812              position.set(pos);
813              if (!dryrun) indexWriter.append(key, position);
814            }
815            pos = dataReader.getPosition();
816          }
817        } catch(Throwable t) {
818          // truncated data file. swallow it.
819        }
820        dataReader.close();
821        if (!dryrun) indexWriter.close();
822        return cnt;
823      }
824    
825    
826      public static void main(String[] args) throws Exception {
827        String usage = "Usage: MapFile inFile outFile";
828          
829        if (args.length != 2) {
830          System.err.println(usage);
831          System.exit(-1);
832        }
833          
834        String in = args[0];
835        String out = args[1];
836    
837        Configuration conf = new Configuration();
838        FileSystem fs = FileSystem.getLocal(conf);
839        MapFile.Reader reader = new MapFile.Reader(fs, in, conf);
840        MapFile.Writer writer =
841          new MapFile.Writer(conf, fs, out,
842              reader.getKeyClass().asSubclass(WritableComparable.class),
843              reader.getValueClass());
844    
845        WritableComparable key =
846          ReflectionUtils.newInstance(reader.getKeyClass().asSubclass(WritableComparable.class), conf);
847        Writable value =
848          ReflectionUtils.newInstance(reader.getValueClass().asSubclass(Writable.class), conf);
849    
850        while (reader.next(key, value))               // copy all entries
851          writer.append(key, value);
852    
853        writer.close();
854      }
855    
856    }