001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.fs;
019    
020    import org.apache.commons.logging.Log;
021    import org.apache.commons.logging.LogFactory;
022    import org.apache.hadoop.conf.Configuration;
023    import org.apache.hadoop.fs.permission.FsPermission;
024    import org.apache.hadoop.io.IOUtils;
025    import org.apache.hadoop.io.Text;
026    import org.apache.hadoop.util.LineReader;
027    import org.apache.hadoop.util.Progressable;
028    
029    import java.io.FileNotFoundException;
030    import java.io.IOException;
031    import java.io.UnsupportedEncodingException;
032    import java.net.URI;
033    import java.net.URISyntaxException;
034    import java.net.URLDecoder;
035    import java.util.*;
036    
037    /**
038     * This is an implementation of the Hadoop Archive 
039     * Filesystem. This archive Filesystem has index files
040     * of the form _index* and has contents of the form
041     * part-*. The index files store the indexes of the 
042     * real files. The index files are of the form _masterindex
043     * and _index. The master index is a level of indirection 
044     * in to the index file to make the look ups faster. the index
045     * file is sorted with hash code of the paths that it contains 
046     * and the master index contains pointers to the positions in 
047     * index for ranges of hashcodes.
048     */
049    
050    public class HarFileSystem extends FileSystem {
051    
052      private static final Log LOG = LogFactory.getLog(HarFileSystem.class);
053    
054      public static final String METADATA_CACHE_ENTRIES_KEY = "fs.har.metadatacache.entries";
055      public static final int METADATA_CACHE_ENTRIES_DEFAULT = 10;
056    
057      public static final int VERSION = 3;
058    
059      private static Map<URI, HarMetaData> harMetaCache;
060    
061      // uri representation of this Har filesystem
062      private URI uri;
063      // the top level path of the archive
064      // in the underlying file system
065      private Path archivePath;
066      // the har auth
067      private String harAuth;
068    
069      // pointer into the static metadata cache
070      private HarMetaData metadata;
071    
072      private FileSystem fs;
073    
074      /**
075       * public construction of harfilesystem
076       */
077      public HarFileSystem() {
078        // Must call #initialize() method to set the underlying file system
079      }
080    
081      /**
082       * Return the protocol scheme for the FileSystem.
083       * <p/>
084       *
085       * @return <code>har</code>
086       */
087      @Override
088      public String getScheme() {
089        return "har";
090      }
091    
092      /**
093       * Constructor to create a HarFileSystem with an
094       * underlying filesystem.
095       * @param fs underlying file system
096       */
097      public HarFileSystem(FileSystem fs) {
098        this.fs = fs;
099        this.statistics = fs.statistics;
100      }
101     
102      private synchronized void initializeMetadataCache(Configuration conf) {
103        if (harMetaCache == null) {
104          int cacheSize = conf.getInt(METADATA_CACHE_ENTRIES_KEY, METADATA_CACHE_ENTRIES_DEFAULT);
105          harMetaCache = Collections.synchronizedMap(new LruCache<URI, HarMetaData>(cacheSize));
106        }
107      }
108     
109      /**
110       * Initialize a Har filesystem per har archive. The 
111       * archive home directory is the top level directory
112       * in the filesystem that contains the HAR archive.
113       * Be careful with this method, you do not want to go 
114       * on creating new Filesystem instances per call to 
115       * path.getFileSystem().
116       * the uri of Har is 
117       * har://underlyingfsscheme-host:port/archivepath.
118       * or 
119       * har:///archivepath. This assumes the underlying filesystem
120       * to be used in case not specified.
121       */
122      @Override
123      public void initialize(URI name, Configuration conf) throws IOException {
124        // initialize the metadata cache, if needed
125        initializeMetadataCache(conf);
126    
127        // decode the name
128        URI underLyingURI = decodeHarURI(name, conf);
129        // we got the right har Path- now check if this is 
130        // truly a har filesystem
131        Path harPath = archivePath(
132          new Path(name.getScheme(), name.getAuthority(), name.getPath()));
133        if (harPath == null) { 
134          throw new IOException("Invalid path for the Har Filesystem. " + 
135                               name.toString());
136        }
137        if (fs == null) {
138          fs = FileSystem.get(underLyingURI, conf);
139        }
140        uri = harPath.toUri();
141        archivePath = new Path(uri.getPath());
142        harAuth = getHarAuth(underLyingURI);
143        //check for the underlying fs containing
144        // the index file
145        Path masterIndexPath = new Path(archivePath, "_masterindex");
146        Path archiveIndexPath = new Path(archivePath, "_index");
147        if (!fs.exists(masterIndexPath) || !fs.exists(archiveIndexPath)) {
148          throw new IOException("Invalid path for the Har Filesystem. " +
149              "No index file in " + harPath);
150        }
151    
152        metadata = harMetaCache.get(uri);
153        if (metadata != null) {
154          FileStatus mStat = fs.getFileStatus(masterIndexPath);
155          FileStatus aStat = fs.getFileStatus(archiveIndexPath);
156          if (mStat.getModificationTime() != metadata.getMasterIndexTimestamp() ||
157              aStat.getModificationTime() != metadata.getArchiveIndexTimestamp()) {
158            // the archive has been overwritten since we last read it
159            // remove the entry from the meta data cache
160            metadata = null;
161            harMetaCache.remove(uri);
162          }
163        }
164        if (metadata == null) {
165          metadata = new HarMetaData(fs, masterIndexPath, archiveIndexPath);
166          metadata.parseMetaData();
167          harMetaCache.put(uri, metadata);
168        }
169      }
170    
171      @Override
172      public Configuration getConf() {
173        return fs.getConf();
174      }
175    
176      // get the version of the filesystem from the masterindex file
177      // the version is currently not useful since its the first version
178      // of archives
179      public int getHarVersion() throws IOException {
180        if (metadata != null) {
181          return metadata.getVersion();
182        }
183        else {
184          throw new IOException("Invalid meta data for the Har Filesystem");
185        }
186      }
187    
188      /*
189       * find the parent path that is the 
190       * archive path in the path. The last
191       * path segment that ends with .har is 
192       * the path that will be returned.
193       */
194      private Path archivePath(Path p) {
195        Path retPath = null;
196        Path tmp = p;
197        for (int i=0; i< p.depth(); i++) {
198          if (tmp.toString().endsWith(".har")) {
199            retPath = tmp;
200            break;
201          }
202          tmp = tmp.getParent();
203        }
204        return retPath;
205      }
206    
207      /**
208       * decode the raw URI to get the underlying URI
209       * @param rawURI raw Har URI
210       * @return filtered URI of the underlying fileSystem
211       */
212      private URI decodeHarURI(URI rawURI, Configuration conf) throws IOException {
213        String tmpAuth = rawURI.getAuthority();
214        //we are using the default file
215        //system in the config 
216        //so create a underlying uri and 
217        //return it
218        if (tmpAuth == null) {
219          //create a path 
220          return FileSystem.getDefaultUri(conf);
221        }
222        String authority = rawURI.getAuthority();
223        if (authority == null) {
224          throw new IOException("URI: " + rawURI
225              + " is an invalid Har URI since authority==null."
226              + "  Expecting har://<scheme>-<host>/<path>.");
227        }
228     
229        int i = authority.indexOf('-');
230        if (i < 0) {
231          throw new IOException("URI: " + rawURI
232              + " is an invalid Har URI since '-' not found."
233              + "  Expecting har://<scheme>-<host>/<path>.");
234        }
235     
236        if (rawURI.getQuery() != null) {
237          // query component not allowed
238          throw new IOException("query component in Path not supported  " + rawURI);
239        }
240     
241        URI tmp;
242        try {
243          // convert <scheme>-<host> to <scheme>://<host>
244          URI baseUri = new URI(authority.replaceFirst("-", "://"));
245     
246          tmp = new URI(baseUri.getScheme(), baseUri.getAuthority(),
247                rawURI.getPath(), rawURI.getQuery(), rawURI.getFragment());
248        } catch (URISyntaxException e) {
249          throw new IOException("URI: " + rawURI
250              + " is an invalid Har URI. Expecting har://<scheme>-<host>/<path>.");
251        }
252        return tmp;
253      }
254    
255      private static String decodeString(String str)
256        throws UnsupportedEncodingException {
257        return URLDecoder.decode(str, "UTF-8");
258      }
259    
260      private String decodeFileName(String fname)
261        throws UnsupportedEncodingException {
262        int version = metadata.getVersion();
263        if (version == 2 || version == 3){
264          return decodeString(fname);
265        }
266        return fname;
267      }
268    
269      /**
270       * return the top level archive.
271       */
272      @Override
273      public Path getWorkingDirectory() {
274        return new Path(uri.toString());
275      }
276    
277      @Override
278      public Path getInitialWorkingDirectory() {
279        return getWorkingDirectory();
280      }
281    
282      @Override
283      public FsStatus getStatus(Path p) throws IOException {
284        return fs.getStatus(p);
285      }
286    
287      /**
288       * Create a har specific auth 
289       * har-underlyingfs:port
290       * @param underLyingUri the uri of underlying
291       * filesystem
292       * @return har specific auth
293       */
294      private String getHarAuth(URI underLyingUri) {
295        String auth = underLyingUri.getScheme() + "-";
296        if (underLyingUri.getHost() != null) {
297          if (underLyingUri.getUserInfo() != null) {
298            auth += underLyingUri.getUserInfo();
299            auth += "@";
300          }
301          auth += underLyingUri.getHost();
302          if (underLyingUri.getPort() != -1) {
303            auth += ":";
304            auth +=  underLyingUri.getPort();
305          }
306        }
307        else {
308          auth += ":";
309        }
310        return auth;
311      }
312    
313      /**
314       * Used for delegation token related functionality. Must delegate to
315       * underlying file system.
316       */
317      @Override
318      protected URI getCanonicalUri() {
319        return fs.getCanonicalUri();
320      }
321    
322      @Override
323      protected URI canonicalizeUri(URI uri) {
324        return fs.canonicalizeUri(uri);
325      }
326    
327      /**
328       * Returns the uri of this filesystem.
329       * The uri is of the form 
330       * har://underlyingfsschema-host:port/pathintheunderlyingfs
331       */
332      @Override
333      public URI getUri() {
334        return this.uri;
335      }
336      
337      @Override
338      protected void checkPath(Path path) {
339        fs.checkPath(path);
340      }
341    
342      @Override
343      public Path resolvePath(Path p) throws IOException {
344        return fs.resolvePath(p);
345      }
346    
347      /**
348       * this method returns the path 
349       * inside the har filesystem.
350       * this is relative path inside 
351       * the har filesystem.
352       * @param path the fully qualified path in the har filesystem.
353       * @return relative path in the filesystem.
354       */
355      private Path getPathInHar(Path path) {
356        Path harPath = new Path(path.toUri().getPath());
357        if (archivePath.compareTo(harPath) == 0)
358          return new Path(Path.SEPARATOR);
359        Path tmp = new Path(harPath.getName());
360        Path parent = harPath.getParent();
361        while (!(parent.compareTo(archivePath) == 0)) {
362          if (parent.toString().equals(Path.SEPARATOR)) {
363            tmp = null;
364            break;
365          }
366          tmp = new Path(parent.getName(), tmp);
367          parent = parent.getParent();
368        }
369        if (tmp != null) 
370          tmp = new Path(Path.SEPARATOR, tmp);
371        return tmp;
372      }
373      
374      //the relative path of p. basically 
375      // getting rid of /. Parsing and doing 
376      // string manipulation is not good - so
377      // just use the path api to do it.
378      private Path makeRelative(String initial, Path p) {
379        String scheme = this.uri.getScheme();
380        String authority = this.uri.getAuthority();
381        Path root = new Path(Path.SEPARATOR);
382        if (root.compareTo(p) == 0)
383          return new Path(scheme, authority, initial);
384        Path retPath = new Path(p.getName());
385        Path parent = p.getParent();
386        for (int i=0; i < p.depth()-1; i++) {
387          retPath = new Path(parent.getName(), retPath);
388          parent = parent.getParent();
389        }
390        return new Path(new Path(scheme, authority, initial),
391          retPath.toString());
392      }
393      
394      /* this makes a path qualified in the har filesystem
395       * (non-Javadoc)
396       * @see org.apache.hadoop.fs.FilterFileSystem#makeQualified(
397       * org.apache.hadoop.fs.Path)
398       */
399      @Override
400      public Path makeQualified(Path path) {
401        // make sure that we just get the 
402        // path component 
403        Path fsPath = path;
404        if (!path.isAbsolute()) {
405          fsPath = new Path(archivePath, path);
406        }
407    
408        URI tmpURI = fsPath.toUri();
409        //change this to Har uri 
410        return new Path(uri.getScheme(), harAuth, tmpURI.getPath());
411      }
412    
413      /**
414       * Fix offset and length of block locations.
415       * Note that this method modifies the original array.
416       * @param locations block locations of har part file
417       * @param start the start of the desired range in the contained file
418       * @param len the length of the desired range
419       * @param fileOffsetInHar the offset of the desired file in the har part file
420       * @return block locations with fixed offset and length
421       */  
422      static BlockLocation[] fixBlockLocations(BlockLocation[] locations,
423                                              long start,
424                                              long len,
425                                              long fileOffsetInHar) {
426        // offset 1 past last byte of desired range
427        long end = start + len;
428    
429        for (BlockLocation location : locations) {
430          // offset of part block relative to beginning of desired file
431          // (may be negative if file starts in this part block)
432          long harBlockStart = location.getOffset() - fileOffsetInHar;
433          // offset 1 past last byte of har block relative to beginning of
434          // desired file
435          long harBlockEnd = harBlockStart + location.getLength();
436          
437          if (start > harBlockStart) {
438            // desired range starts after beginning of this har block
439            // fix offset to beginning of relevant range (relative to desired file)
440            location.setOffset(start);
441            // fix length to relevant portion of har block
442            location.setLength(location.getLength() - (start - harBlockStart));
443          } else {
444            // desired range includes beginning of this har block
445            location.setOffset(harBlockStart);
446          }
447          
448          if (harBlockEnd > end) {
449            // range ends before end of this har block
450            // fix length to remove irrelevant portion at the end
451            location.setLength(location.getLength() - (harBlockEnd - end));
452          }
453        }
454        
455        return locations;
456      }
457      
458      /**
459       * Get block locations from the underlying fs and fix their
460       * offsets and lengths.
461       * @param file the input file status to get block locations
462       * @param start the start of the desired range in the contained file
463       * @param len the length of the desired range
464       * @return block locations for this segment of file
465       * @throws IOException
466       */
467      @Override
468      public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
469                                                   long len) throws IOException {
470        HarStatus hstatus = getFileHarStatus(file.getPath());
471        Path partPath = new Path(archivePath, hstatus.getPartName());
472        FileStatus partStatus = metadata.getPartFileStatus(partPath);
473    
474        // get all part blocks that overlap with the desired file blocks
475        BlockLocation[] locations = 
476          fs.getFileBlockLocations(partStatus,
477                                   hstatus.getStartIndex() + start, len);
478    
479        return fixBlockLocations(locations, start, len, hstatus.getStartIndex());
480      }
481      
482      /**
483       * the hash of the path p inside  the filesystem
484       * @param p the path in the harfilesystem
485       * @return the hash code of the path.
486       */
487      public static int getHarHash(Path p) {
488        return (p.toString().hashCode() & 0x7fffffff);
489      }
490      
491      static class Store {
492        public Store() {
493          begin = end = startHash = endHash = 0;
494        }
495        public Store(long begin, long end, int startHash, int endHash) {
496          this.begin = begin;
497          this.end = end;
498          this.startHash = startHash;
499          this.endHash = endHash;
500        }
501        public long begin;
502        public long end;
503        public int startHash;
504        public int endHash;
505      }
506      
507      /**
508       * Get filestatuses of all the children of a given directory. This just reads
509       * through index file and reads line by line to get all statuses for children
510       * of a directory. Its a brute force way of getting all such filestatuses
511       * 
512       * @param parent
513       *          the parent path directory
514       * @param statuses
515       *          the list to add the children filestatuses to
516       */
517      private void fileStatusesInIndex(HarStatus parent, List<FileStatus> statuses)
518              throws IOException {
519        String parentString = parent.getName();
520        if (!parentString.endsWith(Path.SEPARATOR)){
521            parentString += Path.SEPARATOR;
522        }
523        Path harPath = new Path(parentString);
524        int harlen = harPath.depth();
525        final Map<String, FileStatus> cache = new TreeMap<String, FileStatus>();
526    
527        for (HarStatus hstatus : metadata.archive.values()) {
528          String child = hstatus.getName();
529          if ((child.startsWith(parentString))) {
530            Path thisPath = new Path(child);
531            if (thisPath.depth() == harlen + 1) {
532              statuses.add(toFileStatus(hstatus, cache));
533            }
534          }
535        }
536      }
537    
538      /**
539       * Combine the status stored in the index and the underlying status. 
540       * @param h status stored in the index
541       * @param cache caching the underlying file statuses
542       * @return the combined file status
543       * @throws IOException
544       */
545      private FileStatus toFileStatus(HarStatus h,
546          Map<String, FileStatus> cache) throws IOException {
547        FileStatus underlying = null;
548        if (cache != null) {
549          underlying = cache.get(h.partName);
550        }
551        if (underlying == null) {
552          final Path p = h.isDir? archivePath: new Path(archivePath, h.partName);
553          underlying = fs.getFileStatus(p);
554          if (cache != null) {
555            cache.put(h.partName, underlying);
556          }
557        }
558    
559        long modTime = 0;
560        int version = metadata.getVersion();
561        if (version < 3) {
562          modTime = underlying.getModificationTime();
563        } else if (version == 3) {
564          modTime = h.getModificationTime();
565        }
566    
567        return new FileStatus(
568            h.isDir()? 0L: h.getLength(),
569            h.isDir(),
570            underlying.getReplication(),
571            underlying.getBlockSize(),
572            modTime,
573            underlying.getAccessTime(),
574            underlying.getPermission(),
575            underlying.getOwner(),
576            underlying.getGroup(),
577            makeRelative(this.uri.getPath(), new Path(h.name)));
578      }
579    
580      // a single line parser for hadoop archives status 
581      // stored in a single line in the index files 
582      // the format is of the form 
583      // filename "dir"/"file" partFileName startIndex length 
584      // <space separated children>
585      private class HarStatus {
586        boolean isDir;
587        String name;
588        List<String> children;
589        String partName;
590        long startIndex;
591        long length;
592        long modificationTime = 0;
593    
594        public HarStatus(String harString) throws UnsupportedEncodingException {
595          String[] splits = harString.split(" ");
596          this.name = decodeFileName(splits[0]);
597          this.isDir = "dir".equals(splits[1]) ? true: false;
598          // this is equal to "none" if its a directory
599          this.partName = splits[2];
600          this.startIndex = Long.parseLong(splits[3]);
601          this.length = Long.parseLong(splits[4]);
602    
603          int version = metadata.getVersion();
604          String[] propSplits = null;
605          // propSplits is used to retrieve the metainformation that Har versions
606          // 1 & 2 missed (modification time, permission, owner group).
607          // These fields are stored in an encoded string placed in different
608          // locations depending on whether it's a file or directory entry.
609          // If it's a directory, the string will be placed at the partName
610          // location (directories have no partName because they don't have data
611          // to be stored). This is done because the number of fields in a
612          // directory entry is unbounded (all children are listed at the end)
613          // If it's a file, the string will be the last field.
614          if (isDir) {
615            if (version == 3){
616              propSplits = decodeString(this.partName).split(" ");
617            }
618            children = new ArrayList<String>();
619            for (int i = 5; i < splits.length; i++) {
620              children.add(decodeFileName(splits[i]));
621            }
622          } else if (version == 3) {
623            propSplits = decodeString(splits[5]).split(" ");
624          }
625    
626          if (propSplits != null && propSplits.length >= 4) {
627            modificationTime = Long.parseLong(propSplits[0]);
628            // the fields below are stored in the file but are currently not used
629            // by HarFileSystem
630            // permission = new FsPermission(Short.parseShort(propSplits[1]));
631            // owner = decodeString(propSplits[2]);
632            // group = decodeString(propSplits[3]);
633          }
634        }
635        public boolean isDir() {
636          return isDir;
637        }
638        
639        public String getName() {
640          return name;
641        }
642        public String getPartName() {
643          return partName;
644        }
645        public long getStartIndex() {
646          return startIndex;
647        }
648        public long getLength() {
649          return length;
650        }
651        public long getModificationTime() {
652          return modificationTime;
653        }
654      }
655      
656      /**
657       * return the filestatus of files in har archive.
658       * The permission returned are that of the archive
659       * index files. The permissions are not persisted 
660       * while creating a hadoop archive.
661       * @param f the path in har filesystem
662       * @return filestatus.
663       * @throws IOException
664       */
665      @Override
666      public FileStatus getFileStatus(Path f) throws IOException {
667        HarStatus hstatus = getFileHarStatus(f);
668        return toFileStatus(hstatus, null);
669      }
670    
671      private HarStatus getFileHarStatus(Path f) throws IOException {
672        // get the fs DataInputStream for the underlying file
673        // look up the index.
674        Path p = makeQualified(f);
675        Path harPath = getPathInHar(p);
676        if (harPath == null) {
677          throw new IOException("Invalid file name: " + f + " in " + uri);
678        }
679        HarStatus hstatus = metadata.archive.get(harPath);
680        if (hstatus == null) {
681          throw new FileNotFoundException("File: " +  f + " does not exist in " + uri);
682        }
683        return hstatus;
684      }
685    
686      /**
687       * @return null since no checksum algorithm is implemented.
688       */
689      @Override
690      public FileChecksum getFileChecksum(Path f) {
691        return null;
692      }
693    
694      /**
695       * Returns a har input stream which fakes end of 
696       * file. It reads the index files to get the part 
697       * file name and the size and start of the file.
698       */
699      @Override
700      public FSDataInputStream open(Path f, int bufferSize) throws IOException {
701        // get the fs DataInputStream for the underlying file
702        HarStatus hstatus = getFileHarStatus(f);
703        if (hstatus.isDir()) {
704          throw new FileNotFoundException(f + " : not a file in " +
705                    archivePath);
706        }
707        return new HarFSDataInputStream(fs, new Path(archivePath, 
708            hstatus.getPartName()),
709            hstatus.getStartIndex(), hstatus.getLength(), bufferSize);
710      }
711    
712      /**
713       * Used for delegation token related functionality. Must delegate to
714       * underlying file system.
715       */
716      @Override
717      public FileSystem[] getChildFileSystems() {
718        return new FileSystem[]{fs};
719      }
720    
721      @Override
722      public FSDataOutputStream create(Path f, FsPermission permission,
723          boolean overwrite, int bufferSize, short replication, long blockSize,
724          Progressable progress) throws IOException {
725        throw new IOException("Har: create not allowed.");
726      }
727    
728      @SuppressWarnings("deprecation")
729      @Override
730      public FSDataOutputStream createNonRecursive(Path f, boolean overwrite,
731          int bufferSize, short replication, long blockSize, Progressable progress)
732          throws IOException {
733        throw new IOException("Har: create not allowed.");
734      }
735    
736      @Override
737      public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException {
738        throw new IOException("Har: append not allowed.");
739      }
740    
741      @Override
742      public void close() throws IOException {
743        super.close();
744        if (fs != null) {
745          try {
746            fs.close();
747          } catch(IOException ie) {
748            //this might already be closed
749            // ignore
750          }
751        }
752      }
753      
754      /**
755       * Not implemented.
756       */
757      @Override
758      public boolean setReplication(Path src, short replication) throws IOException{
759        throw new IOException("Har: setReplication not allowed");
760      }
761    
762      @Override
763      public boolean rename(Path src, Path dst) throws IOException {
764        throw new IOException("Har: rename not allowed");
765      }
766    
767      @Override
768      public FSDataOutputStream append(Path f) throws IOException {
769        throw new IOException("Har: append not allowed");
770      }
771    
772      /**
773       * Not implemented.
774       */
775      @Override
776      public boolean delete(Path f, boolean recursive) throws IOException { 
777        throw new IOException("Har: delete not allowed");
778      }
779    
780      /**
781       * liststatus returns the children of a directory 
782       * after looking up the index files.
783       */
784      @Override
785      public FileStatus[] listStatus(Path f) throws IOException {
786        //need to see if the file is an index in file
787        //get the filestatus of the archive directory
788        // we will create fake filestatuses to return
789        // to the client
790        List<FileStatus> statuses = new ArrayList<FileStatus>();
791        Path tmpPath = makeQualified(f);
792        Path harPath = getPathInHar(tmpPath);
793        HarStatus hstatus = metadata.archive.get(harPath);
794        if (hstatus == null) {
795          throw new FileNotFoundException("File " + f + " not found in " + archivePath);
796        }
797        if (hstatus.isDir()) {
798          fileStatusesInIndex(hstatus, statuses);
799        } else {
800          statuses.add(toFileStatus(hstatus, null));
801        }
802        
803        return statuses.toArray(new FileStatus[statuses.size()]);
804      }
805      
806      /**
807       * return the top level archive path.
808       */
809      @Override
810      public Path getHomeDirectory() {
811        return new Path(uri.toString());
812      }
813    
814      @Override
815      public void setWorkingDirectory(Path newDir) {
816        //does nothing.
817      }
818      
819      /**
820       * not implemented.
821       */
822      @Override
823      public boolean mkdirs(Path f, FsPermission permission) throws IOException {
824        throw new IOException("Har: mkdirs not allowed");
825      }
826      
827      /**
828       * not implemented.
829       */
830      @Override
831      public void copyFromLocalFile(boolean delSrc, boolean overwrite,
832          Path src, Path dst) throws IOException {
833        throw new IOException("Har: copyfromlocalfile not allowed");
834      }
835    
836      @Override
837      public void copyFromLocalFile(boolean delSrc, boolean overwrite,
838          Path[] srcs, Path dst) throws IOException {
839        throw new IOException("Har: copyfromlocalfile not allowed");
840      }
841    
842      /**
843       * copies the file in the har filesystem to a local file.
844       */
845      @Override
846      public void copyToLocalFile(boolean delSrc, Path src, Path dst) 
847        throws IOException {
848        FileUtil.copy(this, src, getLocal(getConf()), dst, false, getConf());
849      }
850      
851      /**
852       * not implemented.
853       */
854      @Override
855      public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) 
856        throws IOException {
857        throw new IOException("Har: startLocalOutput not allowed");
858      }
859      
860      /**
861       * not implemented.
862       */
863      @Override
864      public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) 
865        throws IOException {
866        throw new IOException("Har: completeLocalOutput not allowed");
867      }
868      
869      /**
870       * not implemented.
871       */
872      @Override
873      public void setOwner(Path p, String username, String groupname)
874        throws IOException {
875        throw new IOException("Har: setowner not allowed");
876      }
877    
878      @Override
879      public void setTimes(Path p, long mtime, long atime) throws IOException {
880        throw new IOException("Har: setTimes not allowed");
881      }
882    
883      /**
884       * Not implemented.
885       */
886      @Override
887      public void setPermission(Path p, FsPermission permission)
888        throws IOException {
889        throw new IOException("Har: setPermission not allowed");
890      }
891      
892      /**
893       * Hadoop archives input stream. This input stream fakes EOF 
894       * since archive files are part of bigger part files.
895       */
896      private static class HarFSDataInputStream extends FSDataInputStream {
897        /**
898         * Create an input stream that fakes all the reads/positions/seeking.
899         */
900        private static class HarFsInputStream extends FSInputStream
901            implements CanSetDropBehind, CanSetReadahead {
902          private long position, start, end;
903          //The underlying data input stream that the
904          // underlying filesystem will return.
905          private final FSDataInputStream underLyingStream;
906          //one byte buffer
907          private final byte[] oneBytebuff = new byte[1];
908          
909          HarFsInputStream(FileSystem fs, Path path, long start,
910              long length, int bufferSize) throws IOException {
911            if (length < 0) {
912              throw new IllegalArgumentException("Negative length ["+length+"]");
913            }
914            underLyingStream = fs.open(path, bufferSize);
915            underLyingStream.seek(start);
916            // the start of this file in the part file
917            this.start = start;
918            // the position pointer in the part file
919            this.position = start;
920            // the end pointer in the part file
921            this.end = start + length;
922          }
923          
924          @Override
925          public synchronized int available() throws IOException {
926            long remaining = end - underLyingStream.getPos();
927            if (remaining > Integer.MAX_VALUE) {
928              return Integer.MAX_VALUE;
929            }
930            return (int) remaining;
931          }
932          
933          @Override
934          public synchronized  void close() throws IOException {
935            underLyingStream.close();
936            super.close();
937          }
938          
939          //not implemented
940          @Override
941          public void mark(int readLimit) {
942            // do nothing 
943          }
944          
945          /**
946           * reset is not implemented
947           */
948          @Override
949          public void reset() throws IOException {
950            throw new IOException("reset not implemented.");
951          }
952          
953          @Override
954          public synchronized int read() throws IOException {
955            int ret = read(oneBytebuff, 0, 1);
956            return (ret <= 0) ? -1: (oneBytebuff[0] & 0xff);
957          }
958          
959          // NB: currently this method actually never executed becusae
960          // java.io.DataInputStream.read(byte[]) directly delegates to 
961          // method java.io.InputStream.read(byte[], int, int).
962          // However, potentially it can be invoked, so leave it intact for now.
963          @Override
964          public synchronized int read(byte[] b) throws IOException {
965            final int ret = read(b, 0, b.length);
966            return ret;
967          }
968          
969          /**
970           * 
971           */
972          @Override
973          public synchronized int read(byte[] b, int offset, int len) 
974            throws IOException {
975            int newlen = len;
976            int ret = -1;
977            if (position + len > end) {
978              newlen = (int) (end - position);
979            }
980            // end case
981            if (newlen == 0)
982              return ret;
983            ret = underLyingStream.read(b, offset, newlen);
984            position += ret;
985            return ret;
986          }
987          
988          @Override
989          public synchronized long skip(long n) throws IOException {
990            long tmpN = n;
991            if (tmpN > 0) {
992              final long actualRemaining = end - position; 
993              if (tmpN > actualRemaining) {
994                tmpN = actualRemaining;
995              }   
996              underLyingStream.seek(tmpN + position);
997              position += tmpN;
998              return tmpN;
999            }   
1000            // NB: the contract is described in java.io.InputStream.skip(long):
1001            // this method returns the number of bytes actually skipped, so,
1002            // the return value should never be negative. 
1003            return 0;
1004          }   
1005          
1006          @Override
1007          public synchronized long getPos() throws IOException {
1008            return (position - start);
1009          }
1010          
1011          @Override
1012          public synchronized void seek(final long pos) throws IOException {
1013            validatePosition(pos);
1014            position = start + pos;
1015            underLyingStream.seek(position);
1016          }
1017    
1018          private void validatePosition(final long pos) throws IOException {
1019            if (pos < 0) {
1020              throw new IOException("Negative position: "+pos);
1021             }
1022             final long length = end - start;
1023             if (pos > length) {
1024               throw new IOException("Position behind the end " +
1025                   "of the stream (length = "+length+"): " + pos);
1026             }
1027          }
1028    
1029          @Override
1030          public boolean seekToNewSource(long targetPos) throws IOException {
1031            // do not need to implement this
1032            // hdfs in itself does seektonewsource
1033            // while reading.
1034            return false;
1035          }
1036          
1037          /**
1038           * implementing position readable. 
1039           */
1040          @Override
1041          public int read(long pos, byte[] b, int offset, int length) 
1042          throws IOException {
1043            int nlength = length;
1044            if (start + nlength + pos > end) {
1045              // length corrected to the real remaining length:
1046              nlength = (int) (end - start - pos);
1047            }
1048            if (nlength <= 0) {
1049              // EOS:
1050              return -1;
1051            }
1052            return underLyingStream.read(pos + start , b, offset, nlength);
1053          }
1054          
1055          /**
1056           * position readable again.
1057           */
1058          @Override
1059          public void readFully(long pos, byte[] b, int offset, int length) 
1060          throws IOException {
1061            if (start + length + pos > end) {
1062              throw new IOException("Not enough bytes to read.");
1063            }
1064            underLyingStream.readFully(pos + start, b, offset, length);
1065          }
1066          
1067          @Override
1068          public void readFully(long pos, byte[] b) throws IOException {
1069              readFully(pos, b, 0, b.length);
1070          }
1071    
1072          @Override
1073          public void setReadahead(Long readahead) throws IOException {
1074            underLyingStream.setReadahead(readahead);
1075          }
1076    
1077          @Override
1078          public void setDropBehind(Boolean dropBehind) throws IOException {
1079            underLyingStream.setDropBehind(dropBehind);
1080          }
1081        }
1082      
1083        /**
1084         * constructors for har input stream.
1085         * @param fs the underlying filesystem
1086         * @param p The path in the underlying filesystem
1087         * @param start the start position in the part file
1088         * @param length the length of valid data in the part file
1089         * @param bufsize the buffer size
1090         * @throws IOException
1091         */
1092        public HarFSDataInputStream(FileSystem fs, Path  p, long start, 
1093            long length, int bufsize) throws IOException {
1094            super(new HarFsInputStream(fs, p, start, length, bufsize));
1095        }
1096      }
1097    
1098      private class HarMetaData {
1099        private FileSystem fs;
1100        private int version;
1101        // the masterIndex of the archive
1102        private Path masterIndexPath;
1103        // the index file 
1104        private Path archiveIndexPath;
1105    
1106        private long masterIndexTimestamp;
1107        private long archiveIndexTimestamp;
1108    
1109        List<Store> stores = new ArrayList<Store>();
1110        Map<Path, HarStatus> archive = new HashMap<Path, HarStatus>();
1111        private Map<Path, FileStatus> partFileStatuses = new HashMap<Path, FileStatus>();
1112    
1113        public HarMetaData(FileSystem fs, Path masterIndexPath, Path archiveIndexPath) {
1114          this.fs = fs;
1115          this.masterIndexPath = masterIndexPath;
1116          this.archiveIndexPath = archiveIndexPath;
1117        }
1118    
1119        public FileStatus getPartFileStatus(Path partPath) throws IOException {
1120          FileStatus status;
1121          status = partFileStatuses.get(partPath);
1122          if (status == null) {
1123            status = fs.getFileStatus(partPath);
1124            partFileStatuses.put(partPath, status);
1125          }
1126          return status;
1127        }
1128    
1129        public long getMasterIndexTimestamp() {
1130          return masterIndexTimestamp;
1131        }
1132    
1133        public long getArchiveIndexTimestamp() {
1134          return archiveIndexTimestamp;
1135        }
1136    
1137        private int getVersion() {
1138          return version;
1139        }
1140    
1141        private void parseMetaData() throws IOException {
1142          Text line = new Text();
1143          long read;
1144          FSDataInputStream in = null;
1145          LineReader lin = null;
1146    
1147          try {
1148            in = fs.open(masterIndexPath);
1149            FileStatus masterStat = fs.getFileStatus(masterIndexPath);
1150            masterIndexTimestamp = masterStat.getModificationTime();
1151            lin = new LineReader(in, getConf());
1152            read = lin.readLine(line);
1153    
1154            // the first line contains the version of the index file
1155            String versionLine = line.toString();
1156            String[] arr = versionLine.split(" ");
1157            version = Integer.parseInt(arr[0]);
1158            // make it always backwards-compatible
1159            if (this.version > HarFileSystem.VERSION) {
1160              throw new IOException("Invalid version " + 
1161                  this.version + " expected " + HarFileSystem.VERSION);
1162            }
1163    
1164            // each line contains a hashcode range and the index file name
1165            String[] readStr;
1166            while(read < masterStat.getLen()) {
1167              int b = lin.readLine(line);
1168              read += b;
1169              readStr = line.toString().split(" ");
1170              int startHash = Integer.parseInt(readStr[0]);
1171              int endHash  = Integer.parseInt(readStr[1]);
1172              stores.add(new Store(Long.parseLong(readStr[2]), 
1173                  Long.parseLong(readStr[3]), startHash,
1174                  endHash));
1175              line.clear();
1176            }
1177          } catch (IOException ioe) {
1178            LOG.warn("Encountered exception ", ioe);
1179            throw ioe;
1180          } finally {
1181            IOUtils.cleanup(LOG, lin, in);
1182          }
1183    
1184          FSDataInputStream aIn = fs.open(archiveIndexPath);
1185          try {
1186            FileStatus archiveStat = fs.getFileStatus(archiveIndexPath);
1187            archiveIndexTimestamp = archiveStat.getModificationTime();
1188            LineReader aLin;
1189    
1190            // now start reading the real index file
1191            for (Store s: stores) {
1192              read = 0;
1193              aIn.seek(s.begin);
1194              aLin = new LineReader(aIn, getConf());
1195              while (read + s.begin < s.end) {
1196                int tmp = aLin.readLine(line);
1197                read += tmp;
1198                String lineFeed = line.toString();
1199                String[] parsed = lineFeed.split(" ");
1200                parsed[0] = decodeFileName(parsed[0]);
1201                archive.put(new Path(parsed[0]), new HarStatus(lineFeed));
1202                line.clear();
1203              }
1204            }
1205          } finally {
1206            IOUtils.cleanup(LOG, aIn);
1207          }
1208        }
1209      }
1210      
1211      /*
1212       * testing purposes only:
1213       */
1214      HarMetaData getMetadata() {
1215        return metadata;
1216      }
1217    
1218      private static class LruCache<K, V> extends LinkedHashMap<K, V> {
1219        private final int MAX_ENTRIES;
1220    
1221        public LruCache(int maxEntries) {
1222            super(maxEntries + 1, 1.0f, true);
1223            MAX_ENTRIES = maxEntries;
1224        }
1225    
1226        @Override
1227        protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
1228            return size() > MAX_ENTRIES;
1229        }
1230      }
1231    
1232      @SuppressWarnings("deprecation")
1233      @Override
1234      public FsServerDefaults getServerDefaults() throws IOException {
1235        return fs.getServerDefaults();
1236      }
1237    
1238      @Override
1239      public FsServerDefaults getServerDefaults(Path f) throws IOException {
1240        return fs.getServerDefaults(f);
1241      }
1242    
1243      @Override
1244      public long getUsed() throws IOException{
1245        return fs.getUsed();
1246      }
1247    
1248      @SuppressWarnings("deprecation")
1249      @Override
1250      public long getDefaultBlockSize() {
1251        return fs.getDefaultBlockSize();
1252      }
1253    
1254      @SuppressWarnings("deprecation")
1255      @Override
1256      public long getDefaultBlockSize(Path f) {
1257        return fs.getDefaultBlockSize(f);
1258      }
1259    
1260      @SuppressWarnings("deprecation")
1261      @Override
1262      public short getDefaultReplication() {
1263        return fs.getDefaultReplication();
1264      }
1265    
1266      @Override
1267      public short getDefaultReplication(Path f) {
1268        return fs.getDefaultReplication(f);
1269      }
1270    }