001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one
003     * or more contributor license agreements.  See the NOTICE file
004     * distributed with this work for additional information
005     * regarding copyright ownership.  The ASF licenses this file
006     * to you under the Apache License, Version 2.0 (the
007     * "License"); you may not use this file except in compliance
008     * with the License.  You may obtain a copy of the License at
009     *
010     *     http://www.apache.org/licenses/LICENSE-2.0
011     *
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS,
014     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015     * See the License for the specific language governing permissions and
016     * limitations under the License.
017     */
018    package org.apache.hadoop.fs;
019    
020    import java.io.FileNotFoundException;
021    import java.io.IOException;
022    import java.io.UnsupportedEncodingException;
023    import java.net.URI;
024    import java.net.URISyntaxException;
025    import java.net.URLDecoder;
026    import java.util.ArrayList;
027    import java.util.List;
028    import java.util.Map;
029    import java.util.TreeMap;
030    import java.util.HashMap;
031    import java.util.concurrent.ConcurrentHashMap;
032    
033    import org.apache.hadoop.conf.Configuration;
034    import org.apache.hadoop.fs.permission.FsPermission;
035    import org.apache.hadoop.io.Text;
036    import org.apache.hadoop.util.LineReader;
037    import org.apache.hadoop.util.Progressable;
038    
039    /**
040     * This is an implementation of the Hadoop Archive 
041     * Filesystem. This archive Filesystem has index files
042     * of the form _index* and has contents of the form
043     * part-*. The index files store the indexes of the 
044     * real files. The index files are of the form _masterindex
045     * and _index. The master index is a level of indirection 
046     * in to the index file to make the look ups faster. the index
047     * file is sorted with hash code of the paths that it contains 
048     * and the master index contains pointers to the positions in 
049     * index for ranges of hashcodes.
050     */
051    
052    public class HarFileSystem extends FilterFileSystem {
053      public static final int VERSION = 3;
054    
055      private static final Map<URI, HarMetaData> harMetaCache =
056          new ConcurrentHashMap<URI, HarMetaData>();
057    
058      // uri representation of this Har filesystem
059      private URI uri;
060      // the top level path of the archive
061      // in the underlying file system
062      private Path archivePath;
063      // the har auth
064      private String harAuth;
065    
066      // pointer into the static metadata cache
067      private HarMetaData metadata;
068    
069      /**
070       * public construction of harfilesystem
071       *
072       */
073      public HarFileSystem() {
074      }
075      
076      /**
077       * Constructor to create a HarFileSystem with an
078       * underlying filesystem.
079       * @param fs
080       */
081      public HarFileSystem(FileSystem fs) {
082        super(fs);
083      }
084      
085      /**
086       * Initialize a Har filesystem per har archive. The 
087       * archive home directory is the top level directory
088       * in the filesystem that contains the HAR archive.
089       * Be careful with this method, you do not want to go 
090       * on creating new Filesystem instances per call to 
091       * path.getFileSystem().
092       * the uri of Har is 
093       * har://underlyingfsscheme-host:port/archivepath.
094       * or 
095       * har:///archivepath. This assumes the underlying filesystem
096       * to be used in case not specified.
097       */
098      public void initialize(URI name, Configuration conf) throws IOException {
099        // decode the name
100        URI underLyingURI = decodeHarURI(name, conf);
101        // we got the right har Path- now check if this is 
102        // truly a har filesystem
103        Path harPath = archivePath(
104          new Path(name.getScheme(), name.getAuthority(), name.getPath()));
105        if (harPath == null) { 
106          throw new IOException("Invalid path for the Har Filesystem. " + 
107                               name.toString());
108        }
109        if (fs == null) {
110          fs = FileSystem.get(underLyingURI, conf);
111        }
112        uri = harPath.toUri();
113        archivePath = new Path(uri.getPath());
114        harAuth = getHarAuth(underLyingURI);
115        //check for the underlying fs containing
116        // the index file
117        Path masterIndexPath = new Path(archivePath, "_masterindex");
118        Path archiveIndexPath = new Path(archivePath, "_index");
119        if (!fs.exists(masterIndexPath) || !fs.exists(archiveIndexPath)) {
120          throw new IOException("Invalid path for the Har Filesystem. " +
121              "No index file in " + harPath);
122        }
123    
124        metadata = harMetaCache.get(uri);
125        if (metadata != null) {
126          FileStatus mStat = fs.getFileStatus(masterIndexPath);
127          FileStatus aStat = fs.getFileStatus(archiveIndexPath);
128          if (mStat.getModificationTime() != metadata.getMasterIndexTimestamp() ||
129              aStat.getModificationTime() != metadata.getArchiveIndexTimestamp()) {
130            // the archive has been overwritten since we last read it
131            // remove the entry from the meta data cache
132            metadata = null;
133            harMetaCache.remove(uri);
134          }
135        }
136        if (metadata == null) {
137          metadata = new HarMetaData(fs, masterIndexPath, archiveIndexPath);
138          metadata.parseMetaData();
139          harMetaCache.put(uri, metadata);
140        }
141      }
142    
143      // get the version of the filesystem from the masterindex file
144      // the version is currently not useful since its the first version
145      // of archives
146      public int getHarVersion() throws IOException {
147        if (metadata != null) {
148          return metadata.getVersion();
149        }
150        else {
151          throw new IOException("Invalid meta data for the Har Filesystem");
152        }
153      }
154    
155      /*
156       * find the parent path that is the 
157       * archive path in the path. The last
158       * path segment that ends with .har is 
159       * the path that will be returned.
160       */
161      private Path archivePath(Path p) {
162        Path retPath = null;
163        Path tmp = p;
164        for (int i=0; i< p.depth(); i++) {
165          if (tmp.toString().endsWith(".har")) {
166            retPath = tmp;
167            break;
168          }
169          tmp = tmp.getParent();
170        }
171        return retPath;
172      }
173    
174      /**
175       * decode the raw URI to get the underlying URI
176       * @param rawURI raw Har URI
177       * @return filtered URI of the underlying fileSystem
178       */
179      private URI decodeHarURI(URI rawURI, Configuration conf) throws IOException {
180        String tmpAuth = rawURI.getAuthority();
181        //we are using the default file
182        //system in the config 
183        //so create a underlying uri and 
184        //return it
185        if (tmpAuth == null) {
186          //create a path 
187          return FileSystem.getDefaultUri(conf);
188        }
189        String host = rawURI.getHost();
190        if (host == null) {
191          throw new IOException("URI: " + rawURI
192              + " is an invalid Har URI since host==null."
193              + "  Expecting har://<scheme>-<host>/<path>.");
194        }
195        int i = host.indexOf('-');
196        if (i < 0) {
197          throw new IOException("URI: " + rawURI
198              + " is an invalid Har URI since '-' not found."
199              + "  Expecting har://<scheme>-<host>/<path>.");
200        }
201        final String underLyingScheme = host.substring(0, i);
202        i++;
203        final String underLyingHost = i == host.length()? null: host.substring(i);
204        int underLyingPort = rawURI.getPort();
205        String auth = (underLyingHost == null && underLyingPort == -1)?
206                      null:(underLyingHost+":"+underLyingPort);
207        URI tmp = null;
208        if (rawURI.getQuery() != null) {
209          // query component not allowed
210          throw new IOException("query component in Path not supported  " + rawURI);
211        }
212        try {
213          tmp = new URI(underLyingScheme, auth, rawURI.getPath(), 
214                rawURI.getQuery(), rawURI.getFragment());
215        } catch (URISyntaxException e) {
216            // do nothing should not happen
217        }
218        return tmp;
219      }
220    
221      private static String decodeString(String str)
222        throws UnsupportedEncodingException {
223        return URLDecoder.decode(str, "UTF-8");
224      }
225    
226      private String decodeFileName(String fname) 
227        throws UnsupportedEncodingException {
228        int version = metadata.getVersion();
229        if (version == 2 || version == 3){
230          return decodeString(fname);
231        }
232        return fname;
233      }
234    
235      /**
236       * return the top level archive.
237       */
238      public Path getWorkingDirectory() {
239        return new Path(uri.toString());
240      }
241      
242      /**
243       * Create a har specific auth 
244       * har-underlyingfs:port
245       * @param underLyingURI the uri of underlying
246       * filesystem
247       * @return har specific auth
248       */
249      private String getHarAuth(URI underLyingUri) {
250        String auth = underLyingUri.getScheme() + "-";
251        if (underLyingUri.getHost() != null) {
252          auth += underLyingUri.getHost() + ":";
253          if (underLyingUri.getPort() != -1) {
254            auth +=  underLyingUri.getPort();
255          }
256        }
257        else {
258          auth += ":";
259        }
260        return auth;
261      }
262      
263      /**
264       * Returns the uri of this filesystem.
265       * The uri is of the form 
266       * har://underlyingfsschema-host:port/pathintheunderlyingfs
267       */
268      @Override
269      public URI getUri() {
270        return this.uri;
271      }
272      
273      /**
274       * this method returns the path 
275       * inside the har filesystem.
276       * this is relative path inside 
277       * the har filesystem.
278       * @param path the fully qualified path in the har filesystem.
279       * @return relative path in the filesystem.
280       */
281      private Path getPathInHar(Path path) {
282        Path harPath = new Path(path.toUri().getPath());
283        if (archivePath.compareTo(harPath) == 0)
284          return new Path(Path.SEPARATOR);
285        Path tmp = new Path(harPath.getName());
286        Path parent = harPath.getParent();
287        while (!(parent.compareTo(archivePath) == 0)) {
288          if (parent.toString().equals(Path.SEPARATOR)) {
289            tmp = null;
290            break;
291          }
292          tmp = new Path(parent.getName(), tmp);
293          parent = parent.getParent();
294        }
295        if (tmp != null) 
296          tmp = new Path(Path.SEPARATOR, tmp);
297        return tmp;
298      }
299      
300      //the relative path of p. basically 
301      // getting rid of /. Parsing and doing 
302      // string manipulation is not good - so
303      // just use the path api to do it.
304      private Path makeRelative(String initial, Path p) {
305        String scheme = this.uri.getScheme();
306        String authority = this.uri.getAuthority();
307        Path root = new Path(Path.SEPARATOR);
308        if (root.compareTo(p) == 0)
309          return new Path(scheme, authority, initial);
310        Path retPath = new Path(p.getName());
311        Path parent = p.getParent();
312        for (int i=0; i < p.depth()-1; i++) {
313          retPath = new Path(parent.getName(), retPath);
314          parent = parent.getParent();
315        }
316        return new Path(new Path(scheme, authority, initial),
317          retPath.toString());
318      }
319      
320      /* this makes a path qualified in the har filesystem
321       * (non-Javadoc)
322       * @see org.apache.hadoop.fs.FilterFileSystem#makeQualified(
323       * org.apache.hadoop.fs.Path)
324       */
325      @Override
326      public Path makeQualified(Path path) {
327        // make sure that we just get the 
328        // path component 
329        Path fsPath = path;
330        if (!path.isAbsolute()) {
331          fsPath = new Path(archivePath, path);
332        }
333    
334        URI tmpURI = fsPath.toUri();
335        //change this to Har uri 
336        return new Path(uri.getScheme(), harAuth, tmpURI.getPath());
337      }
338    
339      /**
340       * Fix offset and length of block locations.
341       * Note that this method modifies the original array.
342       * @param locations block locations of har part file
343       * @param start the start of the desired range in the contained file
344       * @param len the length of the desired range
345       * @param fileOffsetInHar the offset of the desired file in the har part file
346       * @return block locations with fixed offset and length
347       */  
348      static BlockLocation[] fixBlockLocations(BlockLocation[] locations,
349                                              long start,
350                                              long len,
351                                              long fileOffsetInHar) {
352        // offset 1 past last byte of desired range
353        long end = start + len;
354    
355        for (BlockLocation location : locations) {
356          // offset of part block relative to beginning of desired file
357          // (may be negative if file starts in this part block)
358          long harBlockStart = location.getOffset() - fileOffsetInHar;
359          // offset 1 past last byte of har block relative to beginning of
360          // desired file
361          long harBlockEnd = harBlockStart + location.getLength();
362          
363          if (start > harBlockStart) {
364            // desired range starts after beginning of this har block
365            // fix offset to beginning of relevant range (relative to desired file)
366            location.setOffset(start);
367            // fix length to relevant portion of har block
368            location.setLength(location.getLength() - (start - harBlockStart));
369          } else {
370            // desired range includes beginning of this har block
371            location.setOffset(harBlockStart);
372          }
373          
374          if (harBlockEnd > end) {
375            // range ends before end of this har block
376            // fix length to remove irrelevant portion at the end
377            location.setLength(location.getLength() - (harBlockEnd - end));
378          }
379        }
380        
381        return locations;
382      }
383      
384      /**
385       * Get block locations from the underlying fs and fix their
386       * offsets and lengths.
387       * @param file the input filestatus to get block locations
388       * @param start the start of the desired range in the contained file
389       * @param len the length of the desired range
390       * @return block locations for this segment of file
391       * @throws IOException
392       */
393      @Override
394      public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
395                                                   long len) throws IOException {
396        HarStatus hstatus = getFileHarStatus(file.getPath());
397        Path partPath = new Path(archivePath, hstatus.getPartName());
398        FileStatus partStatus = metadata.getPartFileStatus(partPath);
399    
400        // get all part blocks that overlap with the desired file blocks
401        BlockLocation[] locations = 
402          fs.getFileBlockLocations(partStatus,
403                                   hstatus.getStartIndex() + start, len);
404    
405        return fixBlockLocations(locations, start, len, hstatus.getStartIndex());
406      }
407      
408      /**
409       * the hash of the path p inside iniside
410       * the filesystem
411       * @param p the path in the harfilesystem
412       * @return the hash code of the path.
413       */
414      public static int getHarHash(Path p) {
415        return (p.toString().hashCode() & 0x7fffffff);
416      }
417      
418      static class Store {
419        public Store() {
420          begin = end = startHash = endHash = 0;
421        }
422        public Store(long begin, long end, int startHash, int endHash) {
423          this.begin = begin;
424          this.end = end;
425          this.startHash = startHash;
426          this.endHash = endHash;
427        }
428        public long begin;
429        public long end;
430        public int startHash;
431        public int endHash;
432      }
433      
434      /**
435       * Get filestatuses of all the children of a given directory. This just reads
436       * through index file and reads line by line to get all statuses for children
437       * of a directory. Its a brute force way of getting all such filestatuses
438       * 
439       * @param parent
440       *          the parent path directory
441       * @param statuses
442       *          the list to add the children filestatuses to
443       * @param children
444       *          the string list of children for this parent
445       * @param archiveIndexStat
446       *          the archive index filestatus
447       */
448      private void fileStatusesInIndex(HarStatus parent, List<FileStatus> statuses,
449          List<String> children) throws IOException {
450        String parentString = parent.getName();
451        if (!parentString.endsWith(Path.SEPARATOR)){
452            parentString += Path.SEPARATOR;
453        }
454        Path harPath = new Path(parentString);
455        int harlen = harPath.depth();
456        final Map<String, FileStatus> cache = new TreeMap<String, FileStatus>();
457    
458        for (HarStatus hstatus : metadata.archive.values()) {
459          String child = hstatus.getName();
460          if ((child.startsWith(parentString))) {
461            Path thisPath = new Path(child);
462            if (thisPath.depth() == harlen + 1) {
463              statuses.add(toFileStatus(hstatus, cache));
464            }
465          }
466        }
467      }
468    
469      /**
470       * Combine the status stored in the index and the underlying status. 
471       * @param h status stored in the index
472       * @param cache caching the underlying file statuses
473       * @return the combined file status
474       * @throws IOException
475       */
476      private FileStatus toFileStatus(HarStatus h,
477          Map<String, FileStatus> cache) throws IOException {
478        FileStatus underlying = null;
479        if (cache != null) {
480          underlying = cache.get(h.partName);
481        }
482        if (underlying == null) {
483          final Path p = h.isDir? archivePath: new Path(archivePath, h.partName);
484          underlying = fs.getFileStatus(p);
485          if (cache != null) {
486            cache.put(h.partName, underlying);
487          }
488        }
489    
490        long modTime = 0;
491        int version = metadata.getVersion();
492        if (version < 3) {
493          modTime = underlying.getModificationTime();
494        } else if (version == 3) {
495          modTime = h.getModificationTime();
496        }
497    
498        return new FileStatus(
499            h.isDir()? 0L: h.getLength(),
500            h.isDir(),
501            underlying.getReplication(),
502            underlying.getBlockSize(),
503            modTime,
504            underlying.getAccessTime(),
505            underlying.getPermission(),
506            underlying.getOwner(),
507            underlying.getGroup(),
508            makeRelative(this.uri.getPath(), new Path(h.name)));
509      }
510    
511      // a single line parser for hadoop archives status 
512      // stored in a single line in the index files 
513      // the format is of the form 
514      // filename "dir"/"file" partFileName startIndex length 
515      // <space seperated children>
516      private class HarStatus {
517        boolean isDir;
518        String name;
519        List<String> children;
520        String partName;
521        long startIndex;
522        long length;
523        long modificationTime = 0;
524    
525        public HarStatus(String harString) throws UnsupportedEncodingException {
526          String[] splits = harString.split(" ");
527          this.name = decodeFileName(splits[0]);
528          this.isDir = "dir".equals(splits[1]) ? true: false;
529          // this is equal to "none" if its a directory
530          this.partName = splits[2];
531          this.startIndex = Long.parseLong(splits[3]);
532          this.length = Long.parseLong(splits[4]);
533    
534          int version = metadata.getVersion();
535          String[] propSplits = null;
536          // propSplits is used to retrieve the metainformation that Har versions
537          // 1 & 2 missed (modification time, permission, owner group).
538          // These fields are stored in an encoded string placed in different
539          // locations depending on whether it's a file or directory entry.
540          // If it's a directory, the string will be placed at the partName
541          // location (directories have no partName because they don't have data
542          // to be stored). This is done because the number of fields in a
543          // directory entry is unbounded (all children are listed at the end)
544          // If it's a file, the string will be the last field.
545          if (isDir) {
546            if (version == 3){
547              propSplits = decodeString(this.partName).split(" ");
548            }
549            children = new ArrayList<String>();
550            for (int i = 5; i < splits.length; i++) {
551              children.add(decodeFileName(splits[i]));
552            }
553          } else if (version == 3) {
554            propSplits = decodeString(splits[5]).split(" ");
555          }
556    
557          if (propSplits != null && propSplits.length >= 4) {
558            modificationTime = Long.parseLong(propSplits[0]);
559            // the fields below are stored in the file but are currently not used
560            // by HarFileSystem
561            // permission = new FsPermission(Short.parseShort(propSplits[1]));
562            // owner = decodeString(propSplits[2]);
563            // group = decodeString(propSplits[3]);
564          }
565        }
566        public boolean isDir() {
567          return isDir;
568        }
569        
570        public String getName() {
571          return name;
572        }
573        
574        public List<String> getChildren() {
575          return children;
576        }
577        public String getFileName() {
578          return name;
579        }
580        public String getPartName() {
581          return partName;
582        }
583        public long getStartIndex() {
584          return startIndex;
585        }
586        public long getLength() {
587          return length;
588        }
589        public long getModificationTime() {
590          return modificationTime;
591        }
592      }
593      
594      /**
595       * return the filestatus of files in har archive.
596       * The permission returned are that of the archive
597       * index files. The permissions are not persisted 
598       * while creating a hadoop archive.
599       * @param f the path in har filesystem
600       * @return filestatus.
601       * @throws IOException
602       */
603      @Override
604      public FileStatus getFileStatus(Path f) throws IOException {
605        HarStatus hstatus = getFileHarStatus(f);
606        return toFileStatus(hstatus, null);
607      }
608    
609      private HarStatus getFileHarStatus(Path f) throws IOException {
610        // get the fs DataInputStream for the underlying file
611        // look up the index.
612        Path p = makeQualified(f);
613        Path harPath = getPathInHar(p);
614        if (harPath == null) {
615          throw new IOException("Invalid file name: " + f + " in " + uri);
616        }
617        HarStatus hstatus = metadata.archive.get(harPath);
618        if (hstatus == null) {
619          throw new FileNotFoundException("File: " +  f + " does not exist in " + uri);
620        }
621        return hstatus;
622      }
623    
624      /**
625       * @return null since no checksum algorithm is implemented.
626       */
627      public FileChecksum getFileChecksum(Path f) {
628        return null;
629      }
630    
631      /**
632       * Returns a har input stream which fakes end of 
633       * file. It reads the index files to get the part 
634       * file name and the size and start of the file.
635       */
636      @Override
637      public FSDataInputStream open(Path f, int bufferSize) throws IOException {
638        // get the fs DataInputStream for the underlying file
639        HarStatus hstatus = getFileHarStatus(f);
640        // we got it.. woo hooo!!! 
641        if (hstatus.isDir()) {
642          throw new FileNotFoundException(f + " : not a file in " +
643                    archivePath);
644        }
645        return new HarFSDataInputStream(fs, new Path(archivePath, 
646            hstatus.getPartName()),
647            hstatus.getStartIndex(), hstatus.getLength(), bufferSize);
648      }
649     
650      /*
651       * create throws an exception in Har filesystem.
652       * The archive once created cannot be changed.
653       */
654      public FSDataOutputStream create(Path f, int bufferSize) 
655                                        throws IOException {
656        throw new IOException("Har: Create not allowed");
657      }
658      
659      public FSDataOutputStream create(Path f,
660          FsPermission permission,
661          boolean overwrite,
662          int bufferSize,
663          short replication,
664          long blockSize,
665          Progressable progress) throws IOException {
666        throw new IOException("Har: create not allowed.");
667      }
668      
669      @Override
670      public void close() throws IOException {
671        if (fs != null) {
672          try {
673            fs.close();
674          } catch(IOException ie) {
675            //this might already be closed
676            // ignore
677          }
678        }
679      }
680      
681      /**
682       * Not implemented.
683       */
684      @Override
685      public boolean setReplication(Path src, short replication) throws IOException{
686        throw new IOException("Har: setreplication not allowed");
687      }
688      
689      /**
690       * Not implemented.
691       */
692      @Override
693      public boolean delete(Path f, boolean recursive) throws IOException { 
694        throw new IOException("Har: delete not allowed");
695      }
696      
697      /**
698       * liststatus returns the children of a directory 
699       * after looking up the index files.
700       */
701      @Override
702      public FileStatus[] listStatus(Path f) throws IOException {
703        //need to see if the file is an index in file
704        //get the filestatus of the archive directory
705        // we will create fake filestatuses to return
706        // to the client
707        List<FileStatus> statuses = new ArrayList<FileStatus>();
708        Path tmpPath = makeQualified(f);
709        Path harPath = getPathInHar(tmpPath);
710        HarStatus hstatus = metadata.archive.get(harPath);
711        if (hstatus == null) {
712          throw new FileNotFoundException("File " + f + " not found in " + archivePath);
713        }
714        if (hstatus.isDir()) {
715          fileStatusesInIndex(hstatus, statuses, hstatus.children);
716        } else {
717          statuses.add(toFileStatus(hstatus, null));
718        }
719        
720        return statuses.toArray(new FileStatus[statuses.size()]);
721      }
722      
723      /**
724       * return the top level archive path.
725       */
726      public Path getHomeDirectory() {
727        return new Path(uri.toString());
728      }
729      
730      public void setWorkingDirectory(Path newDir) {
731        //does nothing.
732      }
733      
734      /**
735       * not implemented.
736       */
737      public boolean mkdirs(Path f, FsPermission permission) throws IOException {
738        throw new IOException("Har: mkdirs not allowed");
739      }
740      
741      /**
742       * not implemented.
743       */
744      public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws 
745            IOException {
746        throw new IOException("Har: copyfromlocalfile not allowed");
747      }
748      
749      /**
750       * copies the file in the har filesystem to a local file.
751       */
752      public void copyToLocalFile(boolean delSrc, Path src, Path dst) 
753        throws IOException {
754        FileUtil.copy(this, src, getLocal(getConf()), dst, false, getConf());
755      }
756      
757      /**
758       * not implemented.
759       */
760      public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile) 
761        throws IOException {
762        throw new IOException("Har: startLocalOutput not allowed");
763      }
764      
765      /**
766       * not implemented.
767       */
768      public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile) 
769        throws IOException {
770        throw new IOException("Har: completeLocalOutput not allowed");
771      }
772      
773      /**
774       * not implemented.
775       */
776      public void setOwner(Path p, String username, String groupname)
777        throws IOException {
778        throw new IOException("Har: setowner not allowed");
779      }
780    
781      /**
782       * Not implemented.
783       */
784      public void setPermission(Path p, FsPermission permisssion) 
785        throws IOException {
786        throw new IOException("Har: setPermission not allowed");
787      }
788      
789      /**
790       * Hadoop archives input stream. This input stream fakes EOF 
791       * since archive files are part of bigger part files.
792       */
793      private static class HarFSDataInputStream extends FSDataInputStream {
794        /**
795         * Create an input stream that fakes all the reads/positions/seeking.
796         */
797        private static class HarFsInputStream extends FSInputStream {
798          private long position, start, end;
799          //The underlying data input stream that the
800          // underlying filesystem will return.
801          private FSDataInputStream underLyingStream;
802          //one byte buffer
803          private byte[] oneBytebuff = new byte[1];
804          HarFsInputStream(FileSystem fs, Path path, long start,
805              long length, int bufferSize) throws IOException {
806            underLyingStream = fs.open(path, bufferSize);
807            underLyingStream.seek(start);
808            // the start of this file in the part file
809            this.start = start;
810            // the position pointer in the part file
811            this.position = start;
812            // the end pointer in the part file
813            this.end = start + length;
814          }
815          
816          public synchronized int available() throws IOException {
817            long remaining = end - underLyingStream.getPos();
818            if (remaining > (long)Integer.MAX_VALUE) {
819              return Integer.MAX_VALUE;
820            }
821            return (int) remaining;
822          }
823          
824          public synchronized  void close() throws IOException {
825            underLyingStream.close();
826            super.close();
827          }
828          
829          //not implemented
830          @Override
831          public void mark(int readLimit) {
832            // do nothing 
833          }
834          
835          /**
836           * reset is not implemented
837           */
838          public void reset() throws IOException {
839            throw new IOException("reset not implemented.");
840          }
841          
842          public synchronized int read() throws IOException {
843            int ret = read(oneBytebuff, 0, 1);
844            return (ret <= 0) ? -1: (oneBytebuff[0] & 0xff);
845          }
846          
847          public synchronized int read(byte[] b) throws IOException {
848            int ret = read(b, 0, b.length);
849            if (ret != -1) {
850              position += ret;
851            }
852            return ret;
853          }
854          
855          /**
856           * 
857           */
858          public synchronized int read(byte[] b, int offset, int len) 
859            throws IOException {
860            int newlen = len;
861            int ret = -1;
862            if (position + len > end) {
863              newlen = (int) (end - position);
864            }
865            // end case
866            if (newlen == 0) 
867              return ret;
868            ret = underLyingStream.read(b, offset, newlen);
869            position += ret;
870            return ret;
871          }
872          
873          public synchronized long skip(long n) throws IOException {
874            long tmpN = n;
875            if (tmpN > 0) {
876              if (position + tmpN > end) {
877                tmpN = end - position;
878              }
879              underLyingStream.seek(tmpN + position);
880              position += tmpN;
881              return tmpN;
882            }
883            return (tmpN < 0)? -1 : 0;
884          }
885          
886          public synchronized long getPos() throws IOException {
887            return (position - start);
888          }
889          
890          public synchronized void seek(long pos) throws IOException {
891            if (pos < 0 || (start + pos > end)) {
892              throw new IOException("Failed to seek: EOF");
893            }
894            position = start + pos;
895            underLyingStream.seek(position);
896          }
897    
898          public boolean seekToNewSource(long targetPos) throws IOException {
899            //do not need to implement this
900            // hdfs in itself does seektonewsource 
901            // while reading.
902            return false;
903          }
904          
905          /**
906           * implementing position readable. 
907           */
908          public int read(long pos, byte[] b, int offset, int length) 
909          throws IOException {
910            int nlength = length;
911            if (start + nlength + pos > end) {
912              nlength = (int) (end - (start + pos));
913            }
914            return underLyingStream.read(pos + start , b, offset, nlength);
915          }
916          
917          /**
918           * position readable again.
919           */
920          public void readFully(long pos, byte[] b, int offset, int length) 
921          throws IOException {
922            if (start + length + pos > end) {
923              throw new IOException("Not enough bytes to read.");
924            }
925            underLyingStream.readFully(pos + start, b, offset, length);
926          }
927          
928          public void readFully(long pos, byte[] b) throws IOException {
929              readFully(pos, b, 0, b.length);
930          }
931          
932        }
933      
934        /**
935         * constructors for har input stream.
936         * @param fs the underlying filesystem
937         * @param p The path in the underlying filesystem
938         * @param start the start position in the part file
939         * @param length the length of valid data in the part file
940         * @param bufsize the buffer size
941         * @throws IOException
942         */
943        public HarFSDataInputStream(FileSystem fs, Path  p, long start, 
944            long length, int bufsize) throws IOException {
945            super(new HarFsInputStream(fs, p, start, length, bufsize));
946        }
947    
948        /**
949         * constructor for har input stream.
950         * @param fs the underlying filesystem
951         * @param p the path in the underlying file system
952         * @param start the start position in the part file
953         * @param length the length of valid data in the part file.
954         * @throws IOException
955         */
956        public HarFSDataInputStream(FileSystem fs, Path  p, long start, long length)
957          throws IOException {
958            super(new HarFsInputStream(fs, p, start, length, 0));
959        }
960      }
961    
962      private class HarMetaData {
963        private FileSystem fs;
964        private int version;
965        // the masterIndex of the archive
966        private Path masterIndexPath;
967        // the index file 
968        private Path archiveIndexPath;
969    
970        private long masterIndexTimestamp;
971        private long archiveIndexTimestamp;
972    
973        List<Store> stores = new ArrayList<Store>();
974        Map<Path, HarStatus> archive = new HashMap<Path, HarStatus>();
975        private Map<Path, FileStatus> partFileStatuses = new HashMap<Path, FileStatus>();
976    
977        public HarMetaData(FileSystem fs, Path masterIndexPath, Path archiveIndexPath) {
978          this.fs = fs;
979          this.masterIndexPath = masterIndexPath;
980          this.archiveIndexPath = archiveIndexPath;
981        }
982    
983        public FileStatus getPartFileStatus(Path partPath) throws IOException {
984          FileStatus status;
985          status = partFileStatuses.get(partPath);
986          if (status == null) {
987            status = fs.getFileStatus(partPath);
988            partFileStatuses.put(partPath, status);
989          }
990          return status;
991        }
992    
993        public long getMasterIndexTimestamp() {
994          return masterIndexTimestamp;
995        }
996    
997        public long getArchiveIndexTimestamp() {
998          return archiveIndexTimestamp;
999        }
1000    
1001        private int getVersion() {
1002          return version;
1003        }
1004    
1005        private void parseMetaData() throws IOException {
1006          FSDataInputStream in = fs.open(masterIndexPath);
1007          FileStatus masterStat = fs.getFileStatus(masterIndexPath);
1008          masterIndexTimestamp = masterStat.getModificationTime();
1009          LineReader lin = new LineReader(in, getConf());
1010          Text line = new Text();
1011          long read = lin.readLine(line);
1012    
1013         // the first line contains the version of the index file
1014          String versionLine = line.toString();
1015          String[] arr = versionLine.split(" ");
1016          version = Integer.parseInt(arr[0]);
1017          // make it always backwards-compatible
1018          if (this.version > HarFileSystem.VERSION) {
1019            throw new IOException("Invalid version " + 
1020                this.version + " expected " + HarFileSystem.VERSION);
1021          }
1022    
1023          // each line contains a hashcode range and the index file name
1024          String[] readStr = null;
1025          while(read < masterStat.getLen()) {
1026            int b = lin.readLine(line);
1027            read += b;
1028            readStr = line.toString().split(" ");
1029            int startHash = Integer.parseInt(readStr[0]);
1030            int endHash  = Integer.parseInt(readStr[1]);
1031            stores.add(new Store(Long.parseLong(readStr[2]), 
1032                Long.parseLong(readStr[3]), startHash,
1033                endHash));
1034            line.clear();
1035          }
1036          try {
1037            // close the master index
1038            lin.close();
1039          } catch(IOException io){
1040            // do nothing just a read.
1041          }
1042    
1043          FSDataInputStream aIn = fs.open(archiveIndexPath);
1044          FileStatus archiveStat = fs.getFileStatus(archiveIndexPath);
1045          archiveIndexTimestamp = archiveStat.getModificationTime();
1046          LineReader aLin;
1047    
1048          // now start reading the real index file
1049          for (Store s: stores) {
1050            read = 0;
1051            aIn.seek(s.begin);
1052            aLin = new LineReader(aIn, getConf());
1053            while (read + s.begin < s.end) {
1054              int tmp = aLin.readLine(line);
1055              read += tmp;
1056              String lineFeed = line.toString();
1057              String[] parsed = lineFeed.split(" ");
1058              parsed[0] = decodeFileName(parsed[0]);
1059              archive.put(new Path(parsed[0]), new HarStatus(lineFeed));
1060              line.clear();
1061            }
1062          }
1063          try {
1064            // close the archive index
1065            aIn.close();
1066          } catch(IOException io) {
1067            // do nothing just a read.
1068          }
1069        }
1070      }
1071    }