001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.fs;
019
020import org.apache.commons.logging.Log;
021import org.apache.commons.logging.LogFactory;
022import org.apache.hadoop.conf.Configuration;
023import org.apache.hadoop.fs.permission.FsPermission;
024import org.apache.hadoop.io.IOUtils;
025import org.apache.hadoop.io.Text;
026import org.apache.hadoop.util.LineReader;
027import org.apache.hadoop.util.Progressable;
028
029import java.io.FileNotFoundException;
030import java.io.IOException;
031import java.io.UnsupportedEncodingException;
032import java.net.URI;
033import java.net.URISyntaxException;
034import java.net.URLDecoder;
035import java.util.*;
036
037/**
038 * This is an implementation of the Hadoop Archive 
039 * Filesystem. This archive Filesystem has index files
040 * of the form _index* and has contents of the form
041 * part-*. The index files store the indexes of the 
042 * real files. The index files are of the form _masterindex
043 * and _index. The master index is a level of indirection 
044 * in to the index file to make the look ups faster. the index
045 * file is sorted with hash code of the paths that it contains 
046 * and the master index contains pointers to the positions in 
047 * index for ranges of hashcodes.
048 */
049
050public class HarFileSystem extends FileSystem {
051
052  private static final Log LOG = LogFactory.getLog(HarFileSystem.class);
053
054  public static final String METADATA_CACHE_ENTRIES_KEY = "fs.har.metadatacache.entries";
055  public static final int METADATA_CACHE_ENTRIES_DEFAULT = 10;
056
057  public static final int VERSION = 3;
058
059  private static Map<URI, HarMetaData> harMetaCache;
060
061  // uri representation of this Har filesystem
062  private URI uri;
063  // the top level path of the archive
064  // in the underlying file system
065  private Path archivePath;
066  // the har auth
067  private String harAuth;
068
069  // pointer into the static metadata cache
070  private HarMetaData metadata;
071
072  private FileSystem fs;
073
074  /**
075   * public construction of harfilesystem
076   */
077  public HarFileSystem() {
078    // Must call #initialize() method to set the underlying file system
079  }
080
081  /**
082   * Return the protocol scheme for the FileSystem.
083   * <p/>
084   *
085   * @return <code>har</code>
086   */
087  @Override
088  public String getScheme() {
089    return "har";
090  }
091
092  /**
093   * Constructor to create a HarFileSystem with an
094   * underlying filesystem.
095   * @param fs underlying file system
096   */
097  public HarFileSystem(FileSystem fs) {
098    this.fs = fs;
099    this.statistics = fs.statistics;
100  }
101 
102  private synchronized void initializeMetadataCache(Configuration conf) {
103    if (harMetaCache == null) {
104      int cacheSize = conf.getInt(METADATA_CACHE_ENTRIES_KEY, METADATA_CACHE_ENTRIES_DEFAULT);
105      harMetaCache = Collections.synchronizedMap(new LruCache<URI, HarMetaData>(cacheSize));
106    }
107  }
108 
109  /**
110   * Initialize a Har filesystem per har archive. The 
111   * archive home directory is the top level directory
112   * in the filesystem that contains the HAR archive.
113   * Be careful with this method, you do not want to go 
114   * on creating new Filesystem instances per call to 
115   * path.getFileSystem().
116   * the uri of Har is 
117   * har://underlyingfsscheme-host:port/archivepath.
118   * or 
119   * har:///archivepath. This assumes the underlying filesystem
120   * to be used in case not specified.
121   */
122  @Override
123  public void initialize(URI name, Configuration conf) throws IOException {
124    // initialize the metadata cache, if needed
125    initializeMetadataCache(conf);
126
127    // decode the name
128    URI underLyingURI = decodeHarURI(name, conf);
129    // we got the right har Path- now check if this is 
130    // truly a har filesystem
131    Path harPath = archivePath(
132      new Path(name.getScheme(), name.getAuthority(), name.getPath()));
133    if (harPath == null) { 
134      throw new IOException("Invalid path for the Har Filesystem. " + 
135                           name.toString());
136    }
137    if (fs == null) {
138      fs = FileSystem.get(underLyingURI, conf);
139    }
140    uri = harPath.toUri();
141    archivePath = new Path(uri.getPath());
142    harAuth = getHarAuth(underLyingURI);
143    //check for the underlying fs containing
144    // the index file
145    Path masterIndexPath = new Path(archivePath, "_masterindex");
146    Path archiveIndexPath = new Path(archivePath, "_index");
147    if (!fs.exists(masterIndexPath) || !fs.exists(archiveIndexPath)) {
148      throw new IOException("Invalid path for the Har Filesystem. " +
149          "No index file in " + harPath);
150    }
151
152    metadata = harMetaCache.get(uri);
153    if (metadata != null) {
154      FileStatus mStat = fs.getFileStatus(masterIndexPath);
155      FileStatus aStat = fs.getFileStatus(archiveIndexPath);
156      if (mStat.getModificationTime() != metadata.getMasterIndexTimestamp() ||
157          aStat.getModificationTime() != metadata.getArchiveIndexTimestamp()) {
158        // the archive has been overwritten since we last read it
159        // remove the entry from the meta data cache
160        metadata = null;
161        harMetaCache.remove(uri);
162      }
163    }
164    if (metadata == null) {
165      metadata = new HarMetaData(fs, masterIndexPath, archiveIndexPath);
166      metadata.parseMetaData();
167      harMetaCache.put(uri, metadata);
168    }
169  }
170
171  @Override
172  public Configuration getConf() {
173    return fs.getConf();
174  }
175
176  // get the version of the filesystem from the masterindex file
177  // the version is currently not useful since its the first version
178  // of archives
179  public int getHarVersion() throws IOException {
180    if (metadata != null) {
181      return metadata.getVersion();
182    }
183    else {
184      throw new IOException("Invalid meta data for the Har Filesystem");
185    }
186  }
187
188  /*
189   * find the parent path that is the 
190   * archive path in the path. The last
191   * path segment that ends with .har is 
192   * the path that will be returned.
193   */
194  private Path archivePath(Path p) {
195    Path retPath = null;
196    Path tmp = p;
197    for (int i=0; i< p.depth(); i++) {
198      if (tmp.toString().endsWith(".har")) {
199        retPath = tmp;
200        break;
201      }
202      tmp = tmp.getParent();
203    }
204    return retPath;
205  }
206
207  /**
208   * decode the raw URI to get the underlying URI
209   * @param rawURI raw Har URI
210   * @return filtered URI of the underlying fileSystem
211   */
212  private URI decodeHarURI(URI rawURI, Configuration conf) throws IOException {
213    String tmpAuth = rawURI.getAuthority();
214    //we are using the default file
215    //system in the config 
216    //so create a underlying uri and 
217    //return it
218    if (tmpAuth == null) {
219      //create a path 
220      return FileSystem.getDefaultUri(conf);
221    }
222    String authority = rawURI.getAuthority();
223
224    int i = authority.indexOf('-');
225    if (i < 0) {
226      throw new IOException("URI: " + rawURI
227          + " is an invalid Har URI since '-' not found."
228          + "  Expecting har://<scheme>-<host>/<path>.");
229    }
230 
231    if (rawURI.getQuery() != null) {
232      // query component not allowed
233      throw new IOException("query component in Path not supported  " + rawURI);
234    }
235 
236    URI tmp;
237    try {
238      // convert <scheme>-<host> to <scheme>://<host>
239      URI baseUri = new URI(authority.replaceFirst("-", "://"));
240 
241      tmp = new URI(baseUri.getScheme(), baseUri.getAuthority(),
242            rawURI.getPath(), rawURI.getQuery(), rawURI.getFragment());
243    } catch (URISyntaxException e) {
244      throw new IOException("URI: " + rawURI
245          + " is an invalid Har URI. Expecting har://<scheme>-<host>/<path>.");
246    }
247    return tmp;
248  }
249
250  private static String decodeString(String str)
251    throws UnsupportedEncodingException {
252    return URLDecoder.decode(str, "UTF-8");
253  }
254
255  private String decodeFileName(String fname)
256    throws UnsupportedEncodingException {
257    int version = metadata.getVersion();
258    if (version == 2 || version == 3){
259      return decodeString(fname);
260    }
261    return fname;
262  }
263
264  /**
265   * return the top level archive.
266   */
267  @Override
268  public Path getWorkingDirectory() {
269    return new Path(uri.toString());
270  }
271
272  @Override
273  public Path getInitialWorkingDirectory() {
274    return getWorkingDirectory();
275  }
276
277  @Override
278  public FsStatus getStatus(Path p) throws IOException {
279    return fs.getStatus(p);
280  }
281
282  /**
283   * Create a har specific auth 
284   * har-underlyingfs:port
285   * @param underLyingUri the uri of underlying
286   * filesystem
287   * @return har specific auth
288   */
289  private String getHarAuth(URI underLyingUri) {
290    String auth = underLyingUri.getScheme() + "-";
291    if (underLyingUri.getHost() != null) {
292      if (underLyingUri.getUserInfo() != null) {
293        auth += underLyingUri.getUserInfo();
294        auth += "@";
295      }
296      auth += underLyingUri.getHost();
297      if (underLyingUri.getPort() != -1) {
298        auth += ":";
299        auth +=  underLyingUri.getPort();
300      }
301    }
302    else {
303      auth += ":";
304    }
305    return auth;
306  }
307
308  /**
309   * Used for delegation token related functionality. Must delegate to
310   * underlying file system.
311   */
312  @Override
313  protected URI getCanonicalUri() {
314    return fs.getCanonicalUri();
315  }
316
317  @Override
318  protected URI canonicalizeUri(URI uri) {
319    return fs.canonicalizeUri(uri);
320  }
321
322  /**
323   * Returns the uri of this filesystem.
324   * The uri is of the form 
325   * har://underlyingfsschema-host:port/pathintheunderlyingfs
326   */
327  @Override
328  public URI getUri() {
329    return this.uri;
330  }
331  
332  @Override
333  protected void checkPath(Path path) {
334    fs.checkPath(path);
335  }
336
337  @Override
338  public Path resolvePath(Path p) throws IOException {
339    return fs.resolvePath(p);
340  }
341
342  /**
343   * this method returns the path 
344   * inside the har filesystem.
345   * this is relative path inside 
346   * the har filesystem.
347   * @param path the fully qualified path in the har filesystem.
348   * @return relative path in the filesystem.
349   */
350  private Path getPathInHar(Path path) {
351    Path harPath = new Path(path.toUri().getPath());
352    if (archivePath.compareTo(harPath) == 0)
353      return new Path(Path.SEPARATOR);
354    Path tmp = new Path(harPath.getName());
355    Path parent = harPath.getParent();
356    while (!(parent.compareTo(archivePath) == 0)) {
357      if (parent.toString().equals(Path.SEPARATOR)) {
358        tmp = null;
359        break;
360      }
361      tmp = new Path(parent.getName(), tmp);
362      parent = parent.getParent();
363    }
364    if (tmp != null) 
365      tmp = new Path(Path.SEPARATOR, tmp);
366    return tmp;
367  }
368  
369  //the relative path of p. basically 
370  // getting rid of /. Parsing and doing 
371  // string manipulation is not good - so
372  // just use the path api to do it.
373  private Path makeRelative(String initial, Path p) {
374    String scheme = this.uri.getScheme();
375    String authority = this.uri.getAuthority();
376    Path root = new Path(Path.SEPARATOR);
377    if (root.compareTo(p) == 0)
378      return new Path(scheme, authority, initial);
379    Path retPath = new Path(p.getName());
380    Path parent = p.getParent();
381    for (int i=0; i < p.depth()-1; i++) {
382      retPath = new Path(parent.getName(), retPath);
383      parent = parent.getParent();
384    }
385    return new Path(new Path(scheme, authority, initial),
386      retPath.toString());
387  }
388  
389  /* this makes a path qualified in the har filesystem
390   * (non-Javadoc)
391   * @see org.apache.hadoop.fs.FilterFileSystem#makeQualified(
392   * org.apache.hadoop.fs.Path)
393   */
394  @Override
395  public Path makeQualified(Path path) {
396    // make sure that we just get the 
397    // path component 
398    Path fsPath = path;
399    if (!path.isAbsolute()) {
400      fsPath = new Path(archivePath, path);
401    }
402
403    URI tmpURI = fsPath.toUri();
404    //change this to Har uri 
405    return new Path(uri.getScheme(), harAuth, tmpURI.getPath());
406  }
407
408  /**
409   * Fix offset and length of block locations.
410   * Note that this method modifies the original array.
411   * @param locations block locations of har part file
412   * @param start the start of the desired range in the contained file
413   * @param len the length of the desired range
414   * @param fileOffsetInHar the offset of the desired file in the har part file
415   * @return block locations with fixed offset and length
416   */  
417  static BlockLocation[] fixBlockLocations(BlockLocation[] locations,
418                                          long start,
419                                          long len,
420                                          long fileOffsetInHar) {
421    // offset 1 past last byte of desired range
422    long end = start + len;
423
424    for (BlockLocation location : locations) {
425      // offset of part block relative to beginning of desired file
426      // (may be negative if file starts in this part block)
427      long harBlockStart = location.getOffset() - fileOffsetInHar;
428      // offset 1 past last byte of har block relative to beginning of
429      // desired file
430      long harBlockEnd = harBlockStart + location.getLength();
431      
432      if (start > harBlockStart) {
433        // desired range starts after beginning of this har block
434        // fix offset to beginning of relevant range (relative to desired file)
435        location.setOffset(start);
436        // fix length to relevant portion of har block
437        location.setLength(location.getLength() - (start - harBlockStart));
438      } else {
439        // desired range includes beginning of this har block
440        location.setOffset(harBlockStart);
441      }
442      
443      if (harBlockEnd > end) {
444        // range ends before end of this har block
445        // fix length to remove irrelevant portion at the end
446        location.setLength(location.getLength() - (harBlockEnd - end));
447      }
448    }
449    
450    return locations;
451  }
452  
453  /**
454   * Get block locations from the underlying fs and fix their
455   * offsets and lengths.
456   * @param file the input file status to get block locations
457   * @param start the start of the desired range in the contained file
458   * @param len the length of the desired range
459   * @return block locations for this segment of file
460   * @throws IOException
461   */
462  @Override
463  public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
464                                               long len) throws IOException {
465    HarStatus hstatus = getFileHarStatus(file.getPath());
466    Path partPath = new Path(archivePath, hstatus.getPartName());
467    FileStatus partStatus = metadata.getPartFileStatus(partPath);
468
469    // get all part blocks that overlap with the desired file blocks
470    BlockLocation[] locations = 
471      fs.getFileBlockLocations(partStatus,
472                               hstatus.getStartIndex() + start, len);
473
474    return fixBlockLocations(locations, start, len, hstatus.getStartIndex());
475  }
476  
477  /**
478   * the hash of the path p inside  the filesystem
479   * @param p the path in the harfilesystem
480   * @return the hash code of the path.
481   */
482  public static int getHarHash(Path p) {
483    return (p.toString().hashCode() & 0x7fffffff);
484  }
485  
486  static class Store {
487    public Store(long begin, long end) {
488      this.begin = begin;
489      this.end = end;
490    }
491    public long begin;
492    public long end;
493  }
494  
495  /**
496   * Get filestatuses of all the children of a given directory. This just reads
497   * through index file and reads line by line to get all statuses for children
498   * of a directory. Its a brute force way of getting all such filestatuses
499   * 
500   * @param parent
501   *          the parent path directory
502   * @param statuses
503   *          the list to add the children filestatuses to
504   */
505  private void fileStatusesInIndex(HarStatus parent, List<FileStatus> statuses)
506          throws IOException {
507    String parentString = parent.getName();
508    if (!parentString.endsWith(Path.SEPARATOR)){
509        parentString += Path.SEPARATOR;
510    }
511    Path harPath = new Path(parentString);
512    int harlen = harPath.depth();
513    final Map<String, FileStatus> cache = new TreeMap<String, FileStatus>();
514
515    for (HarStatus hstatus : metadata.archive.values()) {
516      String child = hstatus.getName();
517      if ((child.startsWith(parentString))) {
518        Path thisPath = new Path(child);
519        if (thisPath.depth() == harlen + 1) {
520          statuses.add(toFileStatus(hstatus, cache));
521        }
522      }
523    }
524  }
525
526  /**
527   * Combine the status stored in the index and the underlying status. 
528   * @param h status stored in the index
529   * @param cache caching the underlying file statuses
530   * @return the combined file status
531   * @throws IOException
532   */
533  private FileStatus toFileStatus(HarStatus h,
534      Map<String, FileStatus> cache) throws IOException {
535    FileStatus underlying = null;
536    if (cache != null) {
537      underlying = cache.get(h.partName);
538    }
539    if (underlying == null) {
540      final Path p = h.isDir? archivePath: new Path(archivePath, h.partName);
541      underlying = fs.getFileStatus(p);
542      if (cache != null) {
543        cache.put(h.partName, underlying);
544      }
545    }
546
547    long modTime = 0;
548    int version = metadata.getVersion();
549    if (version < 3) {
550      modTime = underlying.getModificationTime();
551    } else if (version == 3) {
552      modTime = h.getModificationTime();
553    }
554
555    return new FileStatus(
556        h.isDir()? 0L: h.getLength(),
557        h.isDir(),
558        underlying.getReplication(),
559        underlying.getBlockSize(),
560        modTime,
561        underlying.getAccessTime(),
562        underlying.getPermission(),
563        underlying.getOwner(),
564        underlying.getGroup(),
565        makeRelative(this.uri.getPath(), new Path(h.name)));
566  }
567
568  // a single line parser for hadoop archives status 
569  // stored in a single line in the index files 
570  // the format is of the form 
571  // filename "dir"/"file" partFileName startIndex length 
572  // <space separated children>
573  private class HarStatus {
574    boolean isDir;
575    String name;
576    List<String> children;
577    String partName;
578    long startIndex;
579    long length;
580    long modificationTime = 0;
581
582    public HarStatus(String harString) throws UnsupportedEncodingException {
583      String[] splits = harString.split(" ");
584      this.name = decodeFileName(splits[0]);
585      this.isDir = "dir".equals(splits[1]);
586      // this is equal to "none" if its a directory
587      this.partName = splits[2];
588      this.startIndex = Long.parseLong(splits[3]);
589      this.length = Long.parseLong(splits[4]);
590
591      int version = metadata.getVersion();
592      String[] propSplits = null;
593      // propSplits is used to retrieve the metainformation that Har versions
594      // 1 & 2 missed (modification time, permission, owner group).
595      // These fields are stored in an encoded string placed in different
596      // locations depending on whether it's a file or directory entry.
597      // If it's a directory, the string will be placed at the partName
598      // location (directories have no partName because they don't have data
599      // to be stored). This is done because the number of fields in a
600      // directory entry is unbounded (all children are listed at the end)
601      // If it's a file, the string will be the last field.
602      if (isDir) {
603        if (version == 3){
604          propSplits = decodeString(this.partName).split(" ");
605        }
606        children = new ArrayList<String>();
607        for (int i = 5; i < splits.length; i++) {
608          children.add(decodeFileName(splits[i]));
609        }
610      } else if (version == 3) {
611        propSplits = decodeString(splits[5]).split(" ");
612      }
613
614      if (propSplits != null && propSplits.length >= 4) {
615        modificationTime = Long.parseLong(propSplits[0]);
616        // the fields below are stored in the file but are currently not used
617        // by HarFileSystem
618        // permission = new FsPermission(Short.parseShort(propSplits[1]));
619        // owner = decodeString(propSplits[2]);
620        // group = decodeString(propSplits[3]);
621      }
622    }
623    public boolean isDir() {
624      return isDir;
625    }
626    
627    public String getName() {
628      return name;
629    }
630    public String getPartName() {
631      return partName;
632    }
633    public long getStartIndex() {
634      return startIndex;
635    }
636    public long getLength() {
637      return length;
638    }
639    public long getModificationTime() {
640      return modificationTime;
641    }
642  }
643  
644  /**
645   * return the filestatus of files in har archive.
646   * The permission returned are that of the archive
647   * index files. The permissions are not persisted 
648   * while creating a hadoop archive.
649   * @param f the path in har filesystem
650   * @return filestatus.
651   * @throws IOException
652   */
653  @Override
654  public FileStatus getFileStatus(Path f) throws IOException {
655    HarStatus hstatus = getFileHarStatus(f);
656    return toFileStatus(hstatus, null);
657  }
658
659  private HarStatus getFileHarStatus(Path f) throws IOException {
660    // get the fs DataInputStream for the underlying file
661    // look up the index.
662    Path p = makeQualified(f);
663    Path harPath = getPathInHar(p);
664    if (harPath == null) {
665      throw new IOException("Invalid file name: " + f + " in " + uri);
666    }
667    HarStatus hstatus = metadata.archive.get(harPath);
668    if (hstatus == null) {
669      throw new FileNotFoundException("File: " +  f + " does not exist in " + uri);
670    }
671    return hstatus;
672  }
673
674  /**
675   * @return null since no checksum algorithm is implemented.
676   */
677  @Override
678  public FileChecksum getFileChecksum(Path f, long length) {
679    return null;
680  }
681
682  /**
683   * Returns a har input stream which fakes end of 
684   * file. It reads the index files to get the part 
685   * file name and the size and start of the file.
686   */
687  @Override
688  public FSDataInputStream open(Path f, int bufferSize) throws IOException {
689    // get the fs DataInputStream for the underlying file
690    HarStatus hstatus = getFileHarStatus(f);
691    if (hstatus.isDir()) {
692      throw new FileNotFoundException(f + " : not a file in " +
693                archivePath);
694    }
695    return new HarFSDataInputStream(fs, new Path(archivePath, 
696        hstatus.getPartName()),
697        hstatus.getStartIndex(), hstatus.getLength(), bufferSize);
698  }
699
700  /**
701   * Used for delegation token related functionality. Must delegate to
702   * underlying file system.
703   */
704  @Override
705  public FileSystem[] getChildFileSystems() {
706    return new FileSystem[]{fs};
707  }
708
709  @Override
710  public FSDataOutputStream create(Path f, FsPermission permission,
711      boolean overwrite, int bufferSize, short replication, long blockSize,
712      Progressable progress) throws IOException {
713    throw new IOException("Har: create not allowed.");
714  }
715
716  @SuppressWarnings("deprecation")
717  @Override
718  public FSDataOutputStream createNonRecursive(Path f, boolean overwrite,
719      int bufferSize, short replication, long blockSize, Progressable progress)
720      throws IOException {
721    throw new IOException("Har: create not allowed.");
722  }
723
724  @Override
725  public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException {
726    throw new IOException("Har: append not allowed.");
727  }
728
729  @Override
730  public void close() throws IOException {
731    super.close();
732    if (fs != null) {
733      try {
734        fs.close();
735      } catch(IOException ie) {
736        //this might already be closed
737        // ignore
738      }
739    }
740  }
741  
742  /**
743   * Not implemented.
744   */
745  @Override
746  public boolean setReplication(Path src, short replication) throws IOException{
747    throw new IOException("Har: setReplication not allowed");
748  }
749
750  @Override
751  public boolean rename(Path src, Path dst) throws IOException {
752    throw new IOException("Har: rename not allowed");
753  }
754
755  @Override
756  public FSDataOutputStream append(Path f) throws IOException {
757    throw new IOException("Har: append not allowed");
758  }
759
760  /**
761   * Not implemented.
762   */
763  @Override
764  public boolean truncate(Path f, long newLength) throws IOException {
765    throw new IOException("Har: truncate not allowed");
766  }
767
768  /**
769   * Not implemented.
770   */
771  @Override
772  public boolean delete(Path f, boolean recursive) throws IOException { 
773    throw new IOException("Har: delete not allowed");
774  }
775
776  /**
777   * liststatus returns the children of a directory 
778   * after looking up the index files.
779   */
780  @Override
781  public FileStatus[] listStatus(Path f) throws IOException {
782    //need to see if the file is an index in file
783    //get the filestatus of the archive directory
784    // we will create fake filestatuses to return
785    // to the client
786    List<FileStatus> statuses = new ArrayList<FileStatus>();
787    Path tmpPath = makeQualified(f);
788    Path harPath = getPathInHar(tmpPath);
789    HarStatus hstatus = metadata.archive.get(harPath);
790    if (hstatus == null) {
791      throw new FileNotFoundException("File " + f + " not found in " + archivePath);
792    }
793    if (hstatus.isDir()) {
794      fileStatusesInIndex(hstatus, statuses);
795    } else {
796      statuses.add(toFileStatus(hstatus, null));
797    }
798    
799    return statuses.toArray(new FileStatus[statuses.size()]);
800  }
801  
802  /**
803   * return the top level archive path.
804   */
805  @Override
806  public Path getHomeDirectory() {
807    return new Path(uri.toString());
808  }
809
810  @Override
811  public void setWorkingDirectory(Path newDir) {
812    //does nothing.
813  }
814  
815  /**
816   * not implemented.
817   */
818  @Override
819  public boolean mkdirs(Path f, FsPermission permission) throws IOException {
820    throw new IOException("Har: mkdirs not allowed");
821  }
822  
823  /**
824   * not implemented.
825   */
826  @Override
827  public void copyFromLocalFile(boolean delSrc, boolean overwrite,
828      Path src, Path dst) throws IOException {
829    throw new IOException("Har: copyfromlocalfile not allowed");
830  }
831
832  @Override
833  public void copyFromLocalFile(boolean delSrc, boolean overwrite,
834      Path[] srcs, Path dst) throws IOException {
835    throw new IOException("Har: copyfromlocalfile not allowed");
836  }
837
838  /**
839   * copies the file in the har filesystem to a local file.
840   */
841  @Override
842  public void copyToLocalFile(boolean delSrc, Path src, Path dst) 
843    throws IOException {
844    FileUtil.copy(this, src, getLocal(getConf()), dst, false, getConf());
845  }
846  
847  /**
848   * not implemented.
849   */
850  @Override
851  public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) 
852    throws IOException {
853    throw new IOException("Har: startLocalOutput not allowed");
854  }
855  
856  /**
857   * not implemented.
858   */
859  @Override
860  public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) 
861    throws IOException {
862    throw new IOException("Har: completeLocalOutput not allowed");
863  }
864  
865  /**
866   * not implemented.
867   */
868  @Override
869  public void setOwner(Path p, String username, String groupname)
870    throws IOException {
871    throw new IOException("Har: setowner not allowed");
872  }
873
874  @Override
875  public void setTimes(Path p, long mtime, long atime) throws IOException {
876    throw new IOException("Har: setTimes not allowed");
877  }
878
879  /**
880   * Not implemented.
881   */
882  @Override
883  public void setPermission(Path p, FsPermission permission)
884    throws IOException {
885    throw new IOException("Har: setPermission not allowed");
886  }
887  
888  /**
889   * Hadoop archives input stream. This input stream fakes EOF 
890   * since archive files are part of bigger part files.
891   */
892  private static class HarFSDataInputStream extends FSDataInputStream {
893    /**
894     * Create an input stream that fakes all the reads/positions/seeking.
895     */
896    private static class HarFsInputStream extends FSInputStream
897        implements CanSetDropBehind, CanSetReadahead {
898      private long position, start, end;
899      //The underlying data input stream that the
900      // underlying filesystem will return.
901      private final FSDataInputStream underLyingStream;
902      //one byte buffer
903      private final byte[] oneBytebuff = new byte[1];
904      
905      HarFsInputStream(FileSystem fs, Path path, long start,
906          long length, int bufferSize) throws IOException {
907        if (length < 0) {
908          throw new IllegalArgumentException("Negative length ["+length+"]");
909        }
910        underLyingStream = fs.open(path, bufferSize);
911        underLyingStream.seek(start);
912        // the start of this file in the part file
913        this.start = start;
914        // the position pointer in the part file
915        this.position = start;
916        // the end pointer in the part file
917        this.end = start + length;
918      }
919      
920      @Override
921      public synchronized int available() throws IOException {
922        long remaining = end - underLyingStream.getPos();
923        if (remaining > Integer.MAX_VALUE) {
924          return Integer.MAX_VALUE;
925        }
926        return (int) remaining;
927      }
928      
929      @Override
930      public synchronized  void close() throws IOException {
931        underLyingStream.close();
932        super.close();
933      }
934      
935      //not implemented
936      @Override
937      public void mark(int readLimit) {
938        // do nothing 
939      }
940      
941      /**
942       * reset is not implemented
943       */
944      @Override
945      public void reset() throws IOException {
946        throw new IOException("reset not implemented.");
947      }
948      
949      @Override
950      public synchronized int read() throws IOException {
951        int ret = read(oneBytebuff, 0, 1);
952        return (ret <= 0) ? -1: (oneBytebuff[0] & 0xff);
953      }
954      
955      // NB: currently this method actually never executed becusae
956      // java.io.DataInputStream.read(byte[]) directly delegates to 
957      // method java.io.InputStream.read(byte[], int, int).
958      // However, potentially it can be invoked, so leave it intact for now.
959      @Override
960      public synchronized int read(byte[] b) throws IOException {
961        final int ret = read(b, 0, b.length);
962        return ret;
963      }
964      
965      /**
966       * 
967       */
968      @Override
969      public synchronized int read(byte[] b, int offset, int len) 
970        throws IOException {
971        int newlen = len;
972        int ret = -1;
973        if (position + len > end) {
974          newlen = (int) (end - position);
975        }
976        // end case
977        if (newlen == 0)
978          return ret;
979        ret = underLyingStream.read(b, offset, newlen);
980        position += ret;
981        return ret;
982      }
983      
984      @Override
985      public synchronized long skip(long n) throws IOException {
986        long tmpN = n;
987        if (tmpN > 0) {
988          final long actualRemaining = end - position; 
989          if (tmpN > actualRemaining) {
990            tmpN = actualRemaining;
991          }   
992          underLyingStream.seek(tmpN + position);
993          position += tmpN;
994          return tmpN;
995        }   
996        // NB: the contract is described in java.io.InputStream.skip(long):
997        // this method returns the number of bytes actually skipped, so,
998        // the return value should never be negative. 
999        return 0;
1000      }   
1001      
1002      @Override
1003      public synchronized long getPos() throws IOException {
1004        return (position - start);
1005      }
1006      
1007      @Override
1008      public synchronized void seek(final long pos) throws IOException {
1009        validatePosition(pos);
1010        position = start + pos;
1011        underLyingStream.seek(position);
1012      }
1013
1014      private void validatePosition(final long pos) throws IOException {
1015        if (pos < 0) {
1016          throw new IOException("Negative position: "+pos);
1017         }
1018         final long length = end - start;
1019         if (pos > length) {
1020           throw new IOException("Position behind the end " +
1021               "of the stream (length = "+length+"): " + pos);
1022         }
1023      }
1024
1025      @Override
1026      public boolean seekToNewSource(long targetPos) throws IOException {
1027        // do not need to implement this
1028        // hdfs in itself does seektonewsource
1029        // while reading.
1030        return false;
1031      }
1032      
1033      /**
1034       * implementing position readable. 
1035       */
1036      @Override
1037      public int read(long pos, byte[] b, int offset, int length) 
1038      throws IOException {
1039        int nlength = length;
1040        if (start + nlength + pos > end) {
1041          // length corrected to the real remaining length:
1042          nlength = (int) (end - start - pos);
1043        }
1044        if (nlength <= 0) {
1045          // EOS:
1046          return -1;
1047        }
1048        return underLyingStream.read(pos + start , b, offset, nlength);
1049      }
1050      
1051      /**
1052       * position readable again.
1053       */
1054      @Override
1055      public void readFully(long pos, byte[] b, int offset, int length) 
1056      throws IOException {
1057        if (start + length + pos > end) {
1058          throw new IOException("Not enough bytes to read.");
1059        }
1060        underLyingStream.readFully(pos + start, b, offset, length);
1061      }
1062      
1063      @Override
1064      public void readFully(long pos, byte[] b) throws IOException {
1065          readFully(pos, b, 0, b.length);
1066      }
1067
1068      @Override
1069      public void setReadahead(Long readahead) throws IOException {
1070        underLyingStream.setReadahead(readahead);
1071      }
1072
1073      @Override
1074      public void setDropBehind(Boolean dropBehind) throws IOException {
1075        underLyingStream.setDropBehind(dropBehind);
1076      }
1077    }
1078  
1079    /**
1080     * constructors for har input stream.
1081     * @param fs the underlying filesystem
1082     * @param p The path in the underlying filesystem
1083     * @param start the start position in the part file
1084     * @param length the length of valid data in the part file
1085     * @param bufsize the buffer size
1086     * @throws IOException
1087     */
1088    public HarFSDataInputStream(FileSystem fs, Path  p, long start, 
1089        long length, int bufsize) throws IOException {
1090        super(new HarFsInputStream(fs, p, start, length, bufsize));
1091    }
1092  }
1093
1094  private class HarMetaData {
1095    private FileSystem fs;
1096    private int version;
1097    // the masterIndex of the archive
1098    private Path masterIndexPath;
1099    // the index file 
1100    private Path archiveIndexPath;
1101
1102    private long masterIndexTimestamp;
1103    private long archiveIndexTimestamp;
1104
1105    List<Store> stores = new ArrayList<Store>();
1106    Map<Path, HarStatus> archive = new HashMap<Path, HarStatus>();
1107    private Map<Path, FileStatus> partFileStatuses = new HashMap<Path, FileStatus>();
1108
1109    public HarMetaData(FileSystem fs, Path masterIndexPath, Path archiveIndexPath) {
1110      this.fs = fs;
1111      this.masterIndexPath = masterIndexPath;
1112      this.archiveIndexPath = archiveIndexPath;
1113    }
1114
1115    public FileStatus getPartFileStatus(Path partPath) throws IOException {
1116      FileStatus status;
1117      status = partFileStatuses.get(partPath);
1118      if (status == null) {
1119        status = fs.getFileStatus(partPath);
1120        partFileStatuses.put(partPath, status);
1121      }
1122      return status;
1123    }
1124
1125    public long getMasterIndexTimestamp() {
1126      return masterIndexTimestamp;
1127    }
1128
1129    public long getArchiveIndexTimestamp() {
1130      return archiveIndexTimestamp;
1131    }
1132
1133    private int getVersion() {
1134      return version;
1135    }
1136
1137    private void parseMetaData() throws IOException {
1138      Text line = new Text();
1139      long read;
1140      FSDataInputStream in = null;
1141      LineReader lin = null;
1142
1143      try {
1144        in = fs.open(masterIndexPath);
1145        FileStatus masterStat = fs.getFileStatus(masterIndexPath);
1146        masterIndexTimestamp = masterStat.getModificationTime();
1147        lin = new LineReader(in, getConf());
1148        read = lin.readLine(line);
1149
1150        // the first line contains the version of the index file
1151        String versionLine = line.toString();
1152        String[] arr = versionLine.split(" ");
1153        version = Integer.parseInt(arr[0]);
1154        // make it always backwards-compatible
1155        if (this.version > HarFileSystem.VERSION) {
1156          throw new IOException("Invalid version " + 
1157              this.version + " expected " + HarFileSystem.VERSION);
1158        }
1159
1160        // each line contains a hashcode range and the index file name
1161        String[] readStr;
1162        while(read < masterStat.getLen()) {
1163          int b = lin.readLine(line);
1164          read += b;
1165          readStr = line.toString().split(" ");
1166          stores.add(new Store(Long.parseLong(readStr[2]), 
1167              Long.parseLong(readStr[3])));
1168          line.clear();
1169        }
1170      } catch (IOException ioe) {
1171        LOG.warn("Encountered exception ", ioe);
1172        throw ioe;
1173      } finally {
1174        IOUtils.cleanup(LOG, lin, in);
1175      }
1176
1177      FSDataInputStream aIn = fs.open(archiveIndexPath);
1178      try {
1179        FileStatus archiveStat = fs.getFileStatus(archiveIndexPath);
1180        archiveIndexTimestamp = archiveStat.getModificationTime();
1181        LineReader aLin;
1182
1183        // now start reading the real index file
1184        for (Store s: stores) {
1185          read = 0;
1186          aIn.seek(s.begin);
1187          aLin = new LineReader(aIn, getConf());
1188          while (read + s.begin < s.end) {
1189            int tmp = aLin.readLine(line);
1190            read += tmp;
1191            String lineFeed = line.toString();
1192            String[] parsed = lineFeed.split(" ");
1193            parsed[0] = decodeFileName(parsed[0]);
1194            archive.put(new Path(parsed[0]), new HarStatus(lineFeed));
1195            line.clear();
1196          }
1197        }
1198      } finally {
1199        IOUtils.cleanup(LOG, aIn);
1200      }
1201    }
1202  }
1203  
1204  /*
1205   * testing purposes only:
1206   */
1207  HarMetaData getMetadata() {
1208    return metadata;
1209  }
1210
1211  private static class LruCache<K, V> extends LinkedHashMap<K, V> {
1212    private final int MAX_ENTRIES;
1213
1214    public LruCache(int maxEntries) {
1215        super(maxEntries + 1, 1.0f, true);
1216        MAX_ENTRIES = maxEntries;
1217    }
1218
1219    @Override
1220    protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
1221        return size() > MAX_ENTRIES;
1222    }
1223  }
1224
1225  @SuppressWarnings("deprecation")
1226  @Override
1227  public FsServerDefaults getServerDefaults() throws IOException {
1228    return fs.getServerDefaults();
1229  }
1230
1231  @Override
1232  public FsServerDefaults getServerDefaults(Path f) throws IOException {
1233    return fs.getServerDefaults(f);
1234  }
1235
1236  @Override
1237  public long getUsed() throws IOException{
1238    return fs.getUsed();
1239  }
1240
1241  @SuppressWarnings("deprecation")
1242  @Override
1243  public long getDefaultBlockSize() {
1244    return fs.getDefaultBlockSize();
1245  }
1246
1247  @SuppressWarnings("deprecation")
1248  @Override
1249  public long getDefaultBlockSize(Path f) {
1250    return fs.getDefaultBlockSize(f);
1251  }
1252
1253  @SuppressWarnings("deprecation")
1254  @Override
1255  public short getDefaultReplication() {
1256    return fs.getDefaultReplication();
1257  }
1258
1259  @Override
1260  public short getDefaultReplication(Path f) {
1261    return fs.getDefaultReplication(f);
1262  }
1263}