001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.fs;
019
020import org.apache.commons.logging.Log;
021import org.apache.commons.logging.LogFactory;
022import org.apache.hadoop.conf.Configuration;
023import org.apache.hadoop.fs.permission.FsPermission;
024import org.apache.hadoop.io.IOUtils;
025import org.apache.hadoop.io.Text;
026import org.apache.hadoop.util.LineReader;
027import org.apache.hadoop.util.Progressable;
028
029import java.io.FileNotFoundException;
030import java.io.IOException;
031import java.io.UnsupportedEncodingException;
032import java.net.URI;
033import java.net.URISyntaxException;
034import java.net.URLDecoder;
035import java.util.*;
036
037/**
038 * This is an implementation of the Hadoop Archive 
039 * Filesystem. This archive Filesystem has index files
040 * of the form _index* and has contents of the form
041 * part-*. The index files store the indexes of the 
042 * real files. The index files are of the form _masterindex
043 * and _index. The master index is a level of indirection 
044 * in to the index file to make the look ups faster. the index
045 * file is sorted with hash code of the paths that it contains 
046 * and the master index contains pointers to the positions in 
047 * index for ranges of hashcodes.
048 */
049
050public class HarFileSystem extends FileSystem {
051
052  private static final Log LOG = LogFactory.getLog(HarFileSystem.class);
053
054  public static final String METADATA_CACHE_ENTRIES_KEY = "fs.har.metadatacache.entries";
055  public static final int METADATA_CACHE_ENTRIES_DEFAULT = 10;
056
057  public static final int VERSION = 3;
058
059  private static Map<URI, HarMetaData> harMetaCache;
060
061  // uri representation of this Har filesystem
062  private URI uri;
063  // the top level path of the archive
064  // in the underlying file system
065  private Path archivePath;
066  // the har auth
067  private String harAuth;
068
069  // pointer into the static metadata cache
070  private HarMetaData metadata;
071
072  private FileSystem fs;
073
074  /**
075   * public construction of harfilesystem
076   */
077  public HarFileSystem() {
078    // Must call #initialize() method to set the underlying file system
079  }
080
081  /**
082   * Return the protocol scheme for the FileSystem.
083   * <p/>
084   *
085   * @return <code>har</code>
086   */
087  @Override
088  public String getScheme() {
089    return "har";
090  }
091
092  /**
093   * Constructor to create a HarFileSystem with an
094   * underlying filesystem.
095   * @param fs underlying file system
096   */
097  public HarFileSystem(FileSystem fs) {
098    this.fs = fs;
099    this.statistics = fs.statistics;
100  }
101 
102  private synchronized void initializeMetadataCache(Configuration conf) {
103    if (harMetaCache == null) {
104      int cacheSize = conf.getInt(METADATA_CACHE_ENTRIES_KEY, METADATA_CACHE_ENTRIES_DEFAULT);
105      harMetaCache = Collections.synchronizedMap(new LruCache<URI, HarMetaData>(cacheSize));
106    }
107  }
108 
109  /**
110   * Initialize a Har filesystem per har archive. The 
111   * archive home directory is the top level directory
112   * in the filesystem that contains the HAR archive.
113   * Be careful with this method, you do not want to go 
114   * on creating new Filesystem instances per call to 
115   * path.getFileSystem().
116   * the uri of Har is 
117   * har://underlyingfsscheme-host:port/archivepath.
118   * or 
119   * har:///archivepath. This assumes the underlying filesystem
120   * to be used in case not specified.
121   */
122  @Override
123  public void initialize(URI name, Configuration conf) throws IOException {
124    // initialize the metadata cache, if needed
125    initializeMetadataCache(conf);
126
127    // decode the name
128    URI underLyingURI = decodeHarURI(name, conf);
129    // we got the right har Path- now check if this is 
130    // truly a har filesystem
131    Path harPath = archivePath(
132      new Path(name.getScheme(), name.getAuthority(), name.getPath()));
133    if (harPath == null) { 
134      throw new IOException("Invalid path for the Har Filesystem. " + 
135                           name.toString());
136    }
137    if (fs == null) {
138      fs = FileSystem.get(underLyingURI, conf);
139    }
140    uri = harPath.toUri();
141    archivePath = new Path(uri.getPath());
142    harAuth = getHarAuth(underLyingURI);
143    //check for the underlying fs containing
144    // the index file
145    Path masterIndexPath = new Path(archivePath, "_masterindex");
146    Path archiveIndexPath = new Path(archivePath, "_index");
147    if (!fs.exists(masterIndexPath) || !fs.exists(archiveIndexPath)) {
148      throw new IOException("Invalid path for the Har Filesystem. " +
149          "No index file in " + harPath);
150    }
151
152    metadata = harMetaCache.get(uri);
153    if (metadata != null) {
154      FileStatus mStat = fs.getFileStatus(masterIndexPath);
155      FileStatus aStat = fs.getFileStatus(archiveIndexPath);
156      if (mStat.getModificationTime() != metadata.getMasterIndexTimestamp() ||
157          aStat.getModificationTime() != metadata.getArchiveIndexTimestamp()) {
158        // the archive has been overwritten since we last read it
159        // remove the entry from the meta data cache
160        metadata = null;
161        harMetaCache.remove(uri);
162      }
163    }
164    if (metadata == null) {
165      metadata = new HarMetaData(fs, masterIndexPath, archiveIndexPath);
166      metadata.parseMetaData();
167      harMetaCache.put(uri, metadata);
168    }
169  }
170
171  @Override
172  public Configuration getConf() {
173    return fs.getConf();
174  }
175
176  // get the version of the filesystem from the masterindex file
177  // the version is currently not useful since its the first version
178  // of archives
179  public int getHarVersion() throws IOException {
180    if (metadata != null) {
181      return metadata.getVersion();
182    }
183    else {
184      throw new IOException("Invalid meta data for the Har Filesystem");
185    }
186  }
187
188  /*
189   * find the parent path that is the 
190   * archive path in the path. The last
191   * path segment that ends with .har is 
192   * the path that will be returned.
193   */
194  private Path archivePath(Path p) {
195    Path retPath = null;
196    Path tmp = p;
197    for (int i=0; i< p.depth(); i++) {
198      if (tmp.toString().endsWith(".har")) {
199        retPath = tmp;
200        break;
201      }
202      tmp = tmp.getParent();
203    }
204    return retPath;
205  }
206
207  /**
208   * decode the raw URI to get the underlying URI
209   * @param rawURI raw Har URI
210   * @return filtered URI of the underlying fileSystem
211   */
212  private URI decodeHarURI(URI rawURI, Configuration conf) throws IOException {
213    String tmpAuth = rawURI.getAuthority();
214    //we are using the default file
215    //system in the config 
216    //so create a underlying uri and 
217    //return it
218    if (tmpAuth == null) {
219      //create a path 
220      return FileSystem.getDefaultUri(conf);
221    }
222    String authority = rawURI.getAuthority();
223    if (authority == null) {
224      throw new IOException("URI: " + rawURI
225          + " is an invalid Har URI since authority==null."
226          + "  Expecting har://<scheme>-<host>/<path>.");
227    }
228 
229    int i = authority.indexOf('-');
230    if (i < 0) {
231      throw new IOException("URI: " + rawURI
232          + " is an invalid Har URI since '-' not found."
233          + "  Expecting har://<scheme>-<host>/<path>.");
234    }
235 
236    if (rawURI.getQuery() != null) {
237      // query component not allowed
238      throw new IOException("query component in Path not supported  " + rawURI);
239    }
240 
241    URI tmp;
242    try {
243      // convert <scheme>-<host> to <scheme>://<host>
244      URI baseUri = new URI(authority.replaceFirst("-", "://"));
245 
246      tmp = new URI(baseUri.getScheme(), baseUri.getAuthority(),
247            rawURI.getPath(), rawURI.getQuery(), rawURI.getFragment());
248    } catch (URISyntaxException e) {
249      throw new IOException("URI: " + rawURI
250          + " is an invalid Har URI. Expecting har://<scheme>-<host>/<path>.");
251    }
252    return tmp;
253  }
254
255  private static String decodeString(String str)
256    throws UnsupportedEncodingException {
257    return URLDecoder.decode(str, "UTF-8");
258  }
259
260  private String decodeFileName(String fname)
261    throws UnsupportedEncodingException {
262    int version = metadata.getVersion();
263    if (version == 2 || version == 3){
264      return decodeString(fname);
265    }
266    return fname;
267  }
268
269  /**
270   * return the top level archive.
271   */
272  @Override
273  public Path getWorkingDirectory() {
274    return new Path(uri.toString());
275  }
276
277  @Override
278  public Path getInitialWorkingDirectory() {
279    return getWorkingDirectory();
280  }
281
282  @Override
283  public FsStatus getStatus(Path p) throws IOException {
284    return fs.getStatus(p);
285  }
286
287  /**
288   * Create a har specific auth 
289   * har-underlyingfs:port
290   * @param underLyingUri the uri of underlying
291   * filesystem
292   * @return har specific auth
293   */
294  private String getHarAuth(URI underLyingUri) {
295    String auth = underLyingUri.getScheme() + "-";
296    if (underLyingUri.getHost() != null) {
297      if (underLyingUri.getUserInfo() != null) {
298        auth += underLyingUri.getUserInfo();
299        auth += "@";
300      }
301      auth += underLyingUri.getHost();
302      if (underLyingUri.getPort() != -1) {
303        auth += ":";
304        auth +=  underLyingUri.getPort();
305      }
306    }
307    else {
308      auth += ":";
309    }
310    return auth;
311  }
312
313  /**
314   * Used for delegation token related functionality. Must delegate to
315   * underlying file system.
316   */
317  @Override
318  protected URI getCanonicalUri() {
319    return fs.getCanonicalUri();
320  }
321
322  @Override
323  protected URI canonicalizeUri(URI uri) {
324    return fs.canonicalizeUri(uri);
325  }
326
327  /**
328   * Returns the uri of this filesystem.
329   * The uri is of the form 
330   * har://underlyingfsschema-host:port/pathintheunderlyingfs
331   */
332  @Override
333  public URI getUri() {
334    return this.uri;
335  }
336  
337  @Override
338  protected void checkPath(Path path) {
339    fs.checkPath(path);
340  }
341
342  @Override
343  public Path resolvePath(Path p) throws IOException {
344    return fs.resolvePath(p);
345  }
346
347  /**
348   * this method returns the path 
349   * inside the har filesystem.
350   * this is relative path inside 
351   * the har filesystem.
352   * @param path the fully qualified path in the har filesystem.
353   * @return relative path in the filesystem.
354   */
355  private Path getPathInHar(Path path) {
356    Path harPath = new Path(path.toUri().getPath());
357    if (archivePath.compareTo(harPath) == 0)
358      return new Path(Path.SEPARATOR);
359    Path tmp = new Path(harPath.getName());
360    Path parent = harPath.getParent();
361    while (!(parent.compareTo(archivePath) == 0)) {
362      if (parent.toString().equals(Path.SEPARATOR)) {
363        tmp = null;
364        break;
365      }
366      tmp = new Path(parent.getName(), tmp);
367      parent = parent.getParent();
368    }
369    if (tmp != null) 
370      tmp = new Path(Path.SEPARATOR, tmp);
371    return tmp;
372  }
373  
374  //the relative path of p. basically 
375  // getting rid of /. Parsing and doing 
376  // string manipulation is not good - so
377  // just use the path api to do it.
378  private Path makeRelative(String initial, Path p) {
379    String scheme = this.uri.getScheme();
380    String authority = this.uri.getAuthority();
381    Path root = new Path(Path.SEPARATOR);
382    if (root.compareTo(p) == 0)
383      return new Path(scheme, authority, initial);
384    Path retPath = new Path(p.getName());
385    Path parent = p.getParent();
386    for (int i=0; i < p.depth()-1; i++) {
387      retPath = new Path(parent.getName(), retPath);
388      parent = parent.getParent();
389    }
390    return new Path(new Path(scheme, authority, initial),
391      retPath.toString());
392  }
393  
394  /* this makes a path qualified in the har filesystem
395   * (non-Javadoc)
396   * @see org.apache.hadoop.fs.FilterFileSystem#makeQualified(
397   * org.apache.hadoop.fs.Path)
398   */
399  @Override
400  public Path makeQualified(Path path) {
401    // make sure that we just get the 
402    // path component 
403    Path fsPath = path;
404    if (!path.isAbsolute()) {
405      fsPath = new Path(archivePath, path);
406    }
407
408    URI tmpURI = fsPath.toUri();
409    //change this to Har uri 
410    return new Path(uri.getScheme(), harAuth, tmpURI.getPath());
411  }
412
413  /**
414   * Fix offset and length of block locations.
415   * Note that this method modifies the original array.
416   * @param locations block locations of har part file
417   * @param start the start of the desired range in the contained file
418   * @param len the length of the desired range
419   * @param fileOffsetInHar the offset of the desired file in the har part file
420   * @return block locations with fixed offset and length
421   */  
422  static BlockLocation[] fixBlockLocations(BlockLocation[] locations,
423                                          long start,
424                                          long len,
425                                          long fileOffsetInHar) {
426    // offset 1 past last byte of desired range
427    long end = start + len;
428
429    for (BlockLocation location : locations) {
430      // offset of part block relative to beginning of desired file
431      // (may be negative if file starts in this part block)
432      long harBlockStart = location.getOffset() - fileOffsetInHar;
433      // offset 1 past last byte of har block relative to beginning of
434      // desired file
435      long harBlockEnd = harBlockStart + location.getLength();
436      
437      if (start > harBlockStart) {
438        // desired range starts after beginning of this har block
439        // fix offset to beginning of relevant range (relative to desired file)
440        location.setOffset(start);
441        // fix length to relevant portion of har block
442        location.setLength(location.getLength() - (start - harBlockStart));
443      } else {
444        // desired range includes beginning of this har block
445        location.setOffset(harBlockStart);
446      }
447      
448      if (harBlockEnd > end) {
449        // range ends before end of this har block
450        // fix length to remove irrelevant portion at the end
451        location.setLength(location.getLength() - (harBlockEnd - end));
452      }
453    }
454    
455    return locations;
456  }
457  
458  /**
459   * Get block locations from the underlying fs and fix their
460   * offsets and lengths.
461   * @param file the input file status to get block locations
462   * @param start the start of the desired range in the contained file
463   * @param len the length of the desired range
464   * @return block locations for this segment of file
465   * @throws IOException
466   */
467  @Override
468  public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
469                                               long len) throws IOException {
470    HarStatus hstatus = getFileHarStatus(file.getPath());
471    Path partPath = new Path(archivePath, hstatus.getPartName());
472    FileStatus partStatus = metadata.getPartFileStatus(partPath);
473
474    // get all part blocks that overlap with the desired file blocks
475    BlockLocation[] locations = 
476      fs.getFileBlockLocations(partStatus,
477                               hstatus.getStartIndex() + start, len);
478
479    return fixBlockLocations(locations, start, len, hstatus.getStartIndex());
480  }
481  
482  /**
483   * the hash of the path p inside  the filesystem
484   * @param p the path in the harfilesystem
485   * @return the hash code of the path.
486   */
487  public static int getHarHash(Path p) {
488    return (p.toString().hashCode() & 0x7fffffff);
489  }
490  
491  static class Store {
492    public Store() {
493      begin = end = startHash = endHash = 0;
494    }
495    public Store(long begin, long end, int startHash, int endHash) {
496      this.begin = begin;
497      this.end = end;
498      this.startHash = startHash;
499      this.endHash = endHash;
500    }
501    public long begin;
502    public long end;
503    public int startHash;
504    public int endHash;
505  }
506  
507  /**
508   * Get filestatuses of all the children of a given directory. This just reads
509   * through index file and reads line by line to get all statuses for children
510   * of a directory. Its a brute force way of getting all such filestatuses
511   * 
512   * @param parent
513   *          the parent path directory
514   * @param statuses
515   *          the list to add the children filestatuses to
516   */
517  private void fileStatusesInIndex(HarStatus parent, List<FileStatus> statuses)
518          throws IOException {
519    String parentString = parent.getName();
520    if (!parentString.endsWith(Path.SEPARATOR)){
521        parentString += Path.SEPARATOR;
522    }
523    Path harPath = new Path(parentString);
524    int harlen = harPath.depth();
525    final Map<String, FileStatus> cache = new TreeMap<String, FileStatus>();
526
527    for (HarStatus hstatus : metadata.archive.values()) {
528      String child = hstatus.getName();
529      if ((child.startsWith(parentString))) {
530        Path thisPath = new Path(child);
531        if (thisPath.depth() == harlen + 1) {
532          statuses.add(toFileStatus(hstatus, cache));
533        }
534      }
535    }
536  }
537
538  /**
539   * Combine the status stored in the index and the underlying status. 
540   * @param h status stored in the index
541   * @param cache caching the underlying file statuses
542   * @return the combined file status
543   * @throws IOException
544   */
545  private FileStatus toFileStatus(HarStatus h,
546      Map<String, FileStatus> cache) throws IOException {
547    FileStatus underlying = null;
548    if (cache != null) {
549      underlying = cache.get(h.partName);
550    }
551    if (underlying == null) {
552      final Path p = h.isDir? archivePath: new Path(archivePath, h.partName);
553      underlying = fs.getFileStatus(p);
554      if (cache != null) {
555        cache.put(h.partName, underlying);
556      }
557    }
558
559    long modTime = 0;
560    int version = metadata.getVersion();
561    if (version < 3) {
562      modTime = underlying.getModificationTime();
563    } else if (version == 3) {
564      modTime = h.getModificationTime();
565    }
566
567    return new FileStatus(
568        h.isDir()? 0L: h.getLength(),
569        h.isDir(),
570        underlying.getReplication(),
571        underlying.getBlockSize(),
572        modTime,
573        underlying.getAccessTime(),
574        underlying.getPermission(),
575        underlying.getOwner(),
576        underlying.getGroup(),
577        makeRelative(this.uri.getPath(), new Path(h.name)));
578  }
579
580  // a single line parser for hadoop archives status 
581  // stored in a single line in the index files 
582  // the format is of the form 
583  // filename "dir"/"file" partFileName startIndex length 
584  // <space separated children>
585  private class HarStatus {
586    boolean isDir;
587    String name;
588    List<String> children;
589    String partName;
590    long startIndex;
591    long length;
592    long modificationTime = 0;
593
594    public HarStatus(String harString) throws UnsupportedEncodingException {
595      String[] splits = harString.split(" ");
596      this.name = decodeFileName(splits[0]);
597      this.isDir = "dir".equals(splits[1]) ? true: false;
598      // this is equal to "none" if its a directory
599      this.partName = splits[2];
600      this.startIndex = Long.parseLong(splits[3]);
601      this.length = Long.parseLong(splits[4]);
602
603      int version = metadata.getVersion();
604      String[] propSplits = null;
605      // propSplits is used to retrieve the metainformation that Har versions
606      // 1 & 2 missed (modification time, permission, owner group).
607      // These fields are stored in an encoded string placed in different
608      // locations depending on whether it's a file or directory entry.
609      // If it's a directory, the string will be placed at the partName
610      // location (directories have no partName because they don't have data
611      // to be stored). This is done because the number of fields in a
612      // directory entry is unbounded (all children are listed at the end)
613      // If it's a file, the string will be the last field.
614      if (isDir) {
615        if (version == 3){
616          propSplits = decodeString(this.partName).split(" ");
617        }
618        children = new ArrayList<String>();
619        for (int i = 5; i < splits.length; i++) {
620          children.add(decodeFileName(splits[i]));
621        }
622      } else if (version == 3) {
623        propSplits = decodeString(splits[5]).split(" ");
624      }
625
626      if (propSplits != null && propSplits.length >= 4) {
627        modificationTime = Long.parseLong(propSplits[0]);
628        // the fields below are stored in the file but are currently not used
629        // by HarFileSystem
630        // permission = new FsPermission(Short.parseShort(propSplits[1]));
631        // owner = decodeString(propSplits[2]);
632        // group = decodeString(propSplits[3]);
633      }
634    }
635    public boolean isDir() {
636      return isDir;
637    }
638    
639    public String getName() {
640      return name;
641    }
642    public String getPartName() {
643      return partName;
644    }
645    public long getStartIndex() {
646      return startIndex;
647    }
648    public long getLength() {
649      return length;
650    }
651    public long getModificationTime() {
652      return modificationTime;
653    }
654  }
655  
656  /**
657   * return the filestatus of files in har archive.
658   * The permission returned are that of the archive
659   * index files. The permissions are not persisted 
660   * while creating a hadoop archive.
661   * @param f the path in har filesystem
662   * @return filestatus.
663   * @throws IOException
664   */
665  @Override
666  public FileStatus getFileStatus(Path f) throws IOException {
667    HarStatus hstatus = getFileHarStatus(f);
668    return toFileStatus(hstatus, null);
669  }
670
671  private HarStatus getFileHarStatus(Path f) throws IOException {
672    // get the fs DataInputStream for the underlying file
673    // look up the index.
674    Path p = makeQualified(f);
675    Path harPath = getPathInHar(p);
676    if (harPath == null) {
677      throw new IOException("Invalid file name: " + f + " in " + uri);
678    }
679    HarStatus hstatus = metadata.archive.get(harPath);
680    if (hstatus == null) {
681      throw new FileNotFoundException("File: " +  f + " does not exist in " + uri);
682    }
683    return hstatus;
684  }
685
686  /**
687   * @return null since no checksum algorithm is implemented.
688   */
689  @Override
690  public FileChecksum getFileChecksum(Path f, long length) {
691    return null;
692  }
693
694  /**
695   * Returns a har input stream which fakes end of 
696   * file. It reads the index files to get the part 
697   * file name and the size and start of the file.
698   */
699  @Override
700  public FSDataInputStream open(Path f, int bufferSize) throws IOException {
701    // get the fs DataInputStream for the underlying file
702    HarStatus hstatus = getFileHarStatus(f);
703    if (hstatus.isDir()) {
704      throw new FileNotFoundException(f + " : not a file in " +
705                archivePath);
706    }
707    return new HarFSDataInputStream(fs, new Path(archivePath, 
708        hstatus.getPartName()),
709        hstatus.getStartIndex(), hstatus.getLength(), bufferSize);
710  }
711
712  /**
713   * Used for delegation token related functionality. Must delegate to
714   * underlying file system.
715   */
716  @Override
717  public FileSystem[] getChildFileSystems() {
718    return new FileSystem[]{fs};
719  }
720
721  @Override
722  public FSDataOutputStream create(Path f, FsPermission permission,
723      boolean overwrite, int bufferSize, short replication, long blockSize,
724      Progressable progress) throws IOException {
725    throw new IOException("Har: create not allowed.");
726  }
727
728  @SuppressWarnings("deprecation")
729  @Override
730  public FSDataOutputStream createNonRecursive(Path f, boolean overwrite,
731      int bufferSize, short replication, long blockSize, Progressable progress)
732      throws IOException {
733    throw new IOException("Har: create not allowed.");
734  }
735
736  @Override
737  public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException {
738    throw new IOException("Har: append not allowed.");
739  }
740
741  @Override
742  public void close() throws IOException {
743    super.close();
744    if (fs != null) {
745      try {
746        fs.close();
747      } catch(IOException ie) {
748        //this might already be closed
749        // ignore
750      }
751    }
752  }
753  
754  /**
755   * Not implemented.
756   */
757  @Override
758  public boolean setReplication(Path src, short replication) throws IOException{
759    throw new IOException("Har: setReplication not allowed");
760  }
761
762  @Override
763  public boolean rename(Path src, Path dst) throws IOException {
764    throw new IOException("Har: rename not allowed");
765  }
766
767  @Override
768  public FSDataOutputStream append(Path f) throws IOException {
769    throw new IOException("Har: append not allowed");
770  }
771
772  /**
773   * Not implemented.
774   */
775  @Override
776  public boolean delete(Path f, boolean recursive) throws IOException { 
777    throw new IOException("Har: delete not allowed");
778  }
779
780  /**
781   * liststatus returns the children of a directory 
782   * after looking up the index files.
783   */
784  @Override
785  public FileStatus[] listStatus(Path f) throws IOException {
786    //need to see if the file is an index in file
787    //get the filestatus of the archive directory
788    // we will create fake filestatuses to return
789    // to the client
790    List<FileStatus> statuses = new ArrayList<FileStatus>();
791    Path tmpPath = makeQualified(f);
792    Path harPath = getPathInHar(tmpPath);
793    HarStatus hstatus = metadata.archive.get(harPath);
794    if (hstatus == null) {
795      throw new FileNotFoundException("File " + f + " not found in " + archivePath);
796    }
797    if (hstatus.isDir()) {
798      fileStatusesInIndex(hstatus, statuses);
799    } else {
800      statuses.add(toFileStatus(hstatus, null));
801    }
802    
803    return statuses.toArray(new FileStatus[statuses.size()]);
804  }
805  
806  /**
807   * return the top level archive path.
808   */
809  @Override
810  public Path getHomeDirectory() {
811    return new Path(uri.toString());
812  }
813
814  @Override
815  public void setWorkingDirectory(Path newDir) {
816    //does nothing.
817  }
818  
819  /**
820   * not implemented.
821   */
822  @Override
823  public boolean mkdirs(Path f, FsPermission permission) throws IOException {
824    throw new IOException("Har: mkdirs not allowed");
825  }
826  
827  /**
828   * not implemented.
829   */
830  @Override
831  public void copyFromLocalFile(boolean delSrc, boolean overwrite,
832      Path src, Path dst) throws IOException {
833    throw new IOException("Har: copyfromlocalfile not allowed");
834  }
835
836  @Override
837  public void copyFromLocalFile(boolean delSrc, boolean overwrite,
838      Path[] srcs, Path dst) throws IOException {
839    throw new IOException("Har: copyfromlocalfile not allowed");
840  }
841
842  /**
843   * copies the file in the har filesystem to a local file.
844   */
845  @Override
846  public void copyToLocalFile(boolean delSrc, Path src, Path dst) 
847    throws IOException {
848    FileUtil.copy(this, src, getLocal(getConf()), dst, false, getConf());
849  }
850  
851  /**
852   * not implemented.
853   */
854  @Override
855  public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) 
856    throws IOException {
857    throw new IOException("Har: startLocalOutput not allowed");
858  }
859  
860  /**
861   * not implemented.
862   */
863  @Override
864  public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) 
865    throws IOException {
866    throw new IOException("Har: completeLocalOutput not allowed");
867  }
868  
869  /**
870   * not implemented.
871   */
872  @Override
873  public void setOwner(Path p, String username, String groupname)
874    throws IOException {
875    throw new IOException("Har: setowner not allowed");
876  }
877
878  @Override
879  public void setTimes(Path p, long mtime, long atime) throws IOException {
880    throw new IOException("Har: setTimes not allowed");
881  }
882
883  /**
884   * Not implemented.
885   */
886  @Override
887  public void setPermission(Path p, FsPermission permission)
888    throws IOException {
889    throw new IOException("Har: setPermission not allowed");
890  }
891  
892  /**
893   * Hadoop archives input stream. This input stream fakes EOF 
894   * since archive files are part of bigger part files.
895   */
896  private static class HarFSDataInputStream extends FSDataInputStream {
897    /**
898     * Create an input stream that fakes all the reads/positions/seeking.
899     */
900    private static class HarFsInputStream extends FSInputStream
901        implements CanSetDropBehind, CanSetReadahead {
902      private long position, start, end;
903      //The underlying data input stream that the
904      // underlying filesystem will return.
905      private final FSDataInputStream underLyingStream;
906      //one byte buffer
907      private final byte[] oneBytebuff = new byte[1];
908      
909      HarFsInputStream(FileSystem fs, Path path, long start,
910          long length, int bufferSize) throws IOException {
911        if (length < 0) {
912          throw new IllegalArgumentException("Negative length ["+length+"]");
913        }
914        underLyingStream = fs.open(path, bufferSize);
915        underLyingStream.seek(start);
916        // the start of this file in the part file
917        this.start = start;
918        // the position pointer in the part file
919        this.position = start;
920        // the end pointer in the part file
921        this.end = start + length;
922      }
923      
924      @Override
925      public synchronized int available() throws IOException {
926        long remaining = end - underLyingStream.getPos();
927        if (remaining > Integer.MAX_VALUE) {
928          return Integer.MAX_VALUE;
929        }
930        return (int) remaining;
931      }
932      
933      @Override
934      public synchronized  void close() throws IOException {
935        underLyingStream.close();
936        super.close();
937      }
938      
939      //not implemented
940      @Override
941      public void mark(int readLimit) {
942        // do nothing 
943      }
944      
945      /**
946       * reset is not implemented
947       */
948      @Override
949      public void reset() throws IOException {
950        throw new IOException("reset not implemented.");
951      }
952      
953      @Override
954      public synchronized int read() throws IOException {
955        int ret = read(oneBytebuff, 0, 1);
956        return (ret <= 0) ? -1: (oneBytebuff[0] & 0xff);
957      }
958      
959      // NB: currently this method actually never executed becusae
960      // java.io.DataInputStream.read(byte[]) directly delegates to 
961      // method java.io.InputStream.read(byte[], int, int).
962      // However, potentially it can be invoked, so leave it intact for now.
963      @Override
964      public synchronized int read(byte[] b) throws IOException {
965        final int ret = read(b, 0, b.length);
966        return ret;
967      }
968      
969      /**
970       * 
971       */
972      @Override
973      public synchronized int read(byte[] b, int offset, int len) 
974        throws IOException {
975        int newlen = len;
976        int ret = -1;
977        if (position + len > end) {
978          newlen = (int) (end - position);
979        }
980        // end case
981        if (newlen == 0)
982          return ret;
983        ret = underLyingStream.read(b, offset, newlen);
984        position += ret;
985        return ret;
986      }
987      
988      @Override
989      public synchronized long skip(long n) throws IOException {
990        long tmpN = n;
991        if (tmpN > 0) {
992          final long actualRemaining = end - position; 
993          if (tmpN > actualRemaining) {
994            tmpN = actualRemaining;
995          }   
996          underLyingStream.seek(tmpN + position);
997          position += tmpN;
998          return tmpN;
999        }   
1000        // NB: the contract is described in java.io.InputStream.skip(long):
1001        // this method returns the number of bytes actually skipped, so,
1002        // the return value should never be negative. 
1003        return 0;
1004      }   
1005      
1006      @Override
1007      public synchronized long getPos() throws IOException {
1008        return (position - start);
1009      }
1010      
1011      @Override
1012      public synchronized void seek(final long pos) throws IOException {
1013        validatePosition(pos);
1014        position = start + pos;
1015        underLyingStream.seek(position);
1016      }
1017
1018      private void validatePosition(final long pos) throws IOException {
1019        if (pos < 0) {
1020          throw new IOException("Negative position: "+pos);
1021         }
1022         final long length = end - start;
1023         if (pos > length) {
1024           throw new IOException("Position behind the end " +
1025               "of the stream (length = "+length+"): " + pos);
1026         }
1027      }
1028
1029      @Override
1030      public boolean seekToNewSource(long targetPos) throws IOException {
1031        // do not need to implement this
1032        // hdfs in itself does seektonewsource
1033        // while reading.
1034        return false;
1035      }
1036      
1037      /**
1038       * implementing position readable. 
1039       */
1040      @Override
1041      public int read(long pos, byte[] b, int offset, int length) 
1042      throws IOException {
1043        int nlength = length;
1044        if (start + nlength + pos > end) {
1045          // length corrected to the real remaining length:
1046          nlength = (int) (end - start - pos);
1047        }
1048        if (nlength <= 0) {
1049          // EOS:
1050          return -1;
1051        }
1052        return underLyingStream.read(pos + start , b, offset, nlength);
1053      }
1054      
1055      /**
1056       * position readable again.
1057       */
1058      @Override
1059      public void readFully(long pos, byte[] b, int offset, int length) 
1060      throws IOException {
1061        if (start + length + pos > end) {
1062          throw new IOException("Not enough bytes to read.");
1063        }
1064        underLyingStream.readFully(pos + start, b, offset, length);
1065      }
1066      
1067      @Override
1068      public void readFully(long pos, byte[] b) throws IOException {
1069          readFully(pos, b, 0, b.length);
1070      }
1071
1072      @Override
1073      public void setReadahead(Long readahead) throws IOException {
1074        underLyingStream.setReadahead(readahead);
1075      }
1076
1077      @Override
1078      public void setDropBehind(Boolean dropBehind) throws IOException {
1079        underLyingStream.setDropBehind(dropBehind);
1080      }
1081    }
1082  
1083    /**
1084     * constructors for har input stream.
1085     * @param fs the underlying filesystem
1086     * @param p The path in the underlying filesystem
1087     * @param start the start position in the part file
1088     * @param length the length of valid data in the part file
1089     * @param bufsize the buffer size
1090     * @throws IOException
1091     */
1092    public HarFSDataInputStream(FileSystem fs, Path  p, long start, 
1093        long length, int bufsize) throws IOException {
1094        super(new HarFsInputStream(fs, p, start, length, bufsize));
1095    }
1096  }
1097
1098  private class HarMetaData {
1099    private FileSystem fs;
1100    private int version;
1101    // the masterIndex of the archive
1102    private Path masterIndexPath;
1103    // the index file 
1104    private Path archiveIndexPath;
1105
1106    private long masterIndexTimestamp;
1107    private long archiveIndexTimestamp;
1108
1109    List<Store> stores = new ArrayList<Store>();
1110    Map<Path, HarStatus> archive = new HashMap<Path, HarStatus>();
1111    private Map<Path, FileStatus> partFileStatuses = new HashMap<Path, FileStatus>();
1112
1113    public HarMetaData(FileSystem fs, Path masterIndexPath, Path archiveIndexPath) {
1114      this.fs = fs;
1115      this.masterIndexPath = masterIndexPath;
1116      this.archiveIndexPath = archiveIndexPath;
1117    }
1118
1119    public FileStatus getPartFileStatus(Path partPath) throws IOException {
1120      FileStatus status;
1121      status = partFileStatuses.get(partPath);
1122      if (status == null) {
1123        status = fs.getFileStatus(partPath);
1124        partFileStatuses.put(partPath, status);
1125      }
1126      return status;
1127    }
1128
1129    public long getMasterIndexTimestamp() {
1130      return masterIndexTimestamp;
1131    }
1132
1133    public long getArchiveIndexTimestamp() {
1134      return archiveIndexTimestamp;
1135    }
1136
1137    private int getVersion() {
1138      return version;
1139    }
1140
1141    private void parseMetaData() throws IOException {
1142      Text line = new Text();
1143      long read;
1144      FSDataInputStream in = null;
1145      LineReader lin = null;
1146
1147      try {
1148        in = fs.open(masterIndexPath);
1149        FileStatus masterStat = fs.getFileStatus(masterIndexPath);
1150        masterIndexTimestamp = masterStat.getModificationTime();
1151        lin = new LineReader(in, getConf());
1152        read = lin.readLine(line);
1153
1154        // the first line contains the version of the index file
1155        String versionLine = line.toString();
1156        String[] arr = versionLine.split(" ");
1157        version = Integer.parseInt(arr[0]);
1158        // make it always backwards-compatible
1159        if (this.version > HarFileSystem.VERSION) {
1160          throw new IOException("Invalid version " + 
1161              this.version + " expected " + HarFileSystem.VERSION);
1162        }
1163
1164        // each line contains a hashcode range and the index file name
1165        String[] readStr;
1166        while(read < masterStat.getLen()) {
1167          int b = lin.readLine(line);
1168          read += b;
1169          readStr = line.toString().split(" ");
1170          int startHash = Integer.parseInt(readStr[0]);
1171          int endHash  = Integer.parseInt(readStr[1]);
1172          stores.add(new Store(Long.parseLong(readStr[2]), 
1173              Long.parseLong(readStr[3]), startHash,
1174              endHash));
1175          line.clear();
1176        }
1177      } catch (IOException ioe) {
1178        LOG.warn("Encountered exception ", ioe);
1179        throw ioe;
1180      } finally {
1181        IOUtils.cleanup(LOG, lin, in);
1182      }
1183
1184      FSDataInputStream aIn = fs.open(archiveIndexPath);
1185      try {
1186        FileStatus archiveStat = fs.getFileStatus(archiveIndexPath);
1187        archiveIndexTimestamp = archiveStat.getModificationTime();
1188        LineReader aLin;
1189
1190        // now start reading the real index file
1191        for (Store s: stores) {
1192          read = 0;
1193          aIn.seek(s.begin);
1194          aLin = new LineReader(aIn, getConf());
1195          while (read + s.begin < s.end) {
1196            int tmp = aLin.readLine(line);
1197            read += tmp;
1198            String lineFeed = line.toString();
1199            String[] parsed = lineFeed.split(" ");
1200            parsed[0] = decodeFileName(parsed[0]);
1201            archive.put(new Path(parsed[0]), new HarStatus(lineFeed));
1202            line.clear();
1203          }
1204        }
1205      } finally {
1206        IOUtils.cleanup(LOG, aIn);
1207      }
1208    }
1209  }
1210  
1211  /*
1212   * testing purposes only:
1213   */
1214  HarMetaData getMetadata() {
1215    return metadata;
1216  }
1217
1218  private static class LruCache<K, V> extends LinkedHashMap<K, V> {
1219    private final int MAX_ENTRIES;
1220
1221    public LruCache(int maxEntries) {
1222        super(maxEntries + 1, 1.0f, true);
1223        MAX_ENTRIES = maxEntries;
1224    }
1225
1226    @Override
1227    protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
1228        return size() > MAX_ENTRIES;
1229    }
1230  }
1231
1232  @SuppressWarnings("deprecation")
1233  @Override
1234  public FsServerDefaults getServerDefaults() throws IOException {
1235    return fs.getServerDefaults();
1236  }
1237
1238  @Override
1239  public FsServerDefaults getServerDefaults(Path f) throws IOException {
1240    return fs.getServerDefaults(f);
1241  }
1242
1243  @Override
1244  public long getUsed() throws IOException{
1245    return fs.getUsed();
1246  }
1247
1248  @SuppressWarnings("deprecation")
1249  @Override
1250  public long getDefaultBlockSize() {
1251    return fs.getDefaultBlockSize();
1252  }
1253
1254  @SuppressWarnings("deprecation")
1255  @Override
1256  public long getDefaultBlockSize(Path f) {
1257    return fs.getDefaultBlockSize(f);
1258  }
1259
1260  @SuppressWarnings("deprecation")
1261  @Override
1262  public short getDefaultReplication() {
1263    return fs.getDefaultReplication();
1264  }
1265
1266  @Override
1267  public short getDefaultReplication(Path f) {
1268    return fs.getDefaultReplication(f);
1269  }
1270}