001 /**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 package org.apache.hadoop.fs;
019
020 import java.io.FileNotFoundException;
021 import java.io.IOException;
022 import java.io.UnsupportedEncodingException;
023 import java.net.URI;
024 import java.net.URISyntaxException;
025 import java.net.URLDecoder;
026 import java.util.ArrayList;
027 import java.util.EnumSet;
028 import java.util.List;
029 import java.util.Map;
030 import java.util.TreeMap;
031 import java.util.HashMap;
032
033 import org.apache.hadoop.conf.Configuration;
034 import org.apache.hadoop.fs.permission.FsPermission;
035 import org.apache.hadoop.io.Text;
036 import org.apache.hadoop.util.LineReader;
037 import org.apache.hadoop.util.Progressable;
038
039 /**
040 * This is an implementation of the Hadoop Archive
041 * Filesystem. This archive Filesystem has index files
042 * of the form _index* and has contents of the form
043 * part-*. The index files store the indexes of the
044 * real files. The index files are of the form _masterindex
045 * and _index. The master index is a level of indirection
046 * in to the index file to make the look ups faster. the index
047 * file is sorted with hash code of the paths that it contains
048 * and the master index contains pointers to the positions in
049 * index for ranges of hashcodes.
050 */
051
052 public class HarFileSystem extends FilterFileSystem {
053 public static final int VERSION = 3;
054
055 private static final Map<URI, HarMetaData> harMetaCache = new HashMap<URI, HarMetaData>();
056
057 // uri representation of this Har filesystem
058 private URI uri;
059 // the top level path of the archive
060 // in the underlying file system
061 private Path archivePath;
062 // the har auth
063 private String harAuth;
064
065 // pointer into the static metadata cache
066 private HarMetaData metadata;
067
068 /**
069 * public construction of harfilesystem
070 *
071 */
072 public HarFileSystem() {
073 }
074
075 /**
076 * Return the protocol scheme for the FileSystem.
077 * <p/>
078 *
079 * @return <code>har</code>
080 */
081 @Override
082 public String getScheme() {
083 return "har";
084 }
085
086 /**
087 * Constructor to create a HarFileSystem with an
088 * underlying filesystem.
089 * @param fs
090 */
091 public HarFileSystem(FileSystem fs) {
092 super(fs);
093 }
094
095 /**
096 * Initialize a Har filesystem per har archive. The
097 * archive home directory is the top level directory
098 * in the filesystem that contains the HAR archive.
099 * Be careful with this method, you do not want to go
100 * on creating new Filesystem instances per call to
101 * path.getFileSystem().
102 * the uri of Har is
103 * har://underlyingfsscheme-host:port/archivepath.
104 * or
105 * har:///archivepath. This assumes the underlying filesystem
106 * to be used in case not specified.
107 */
108 public void initialize(URI name, Configuration conf) throws IOException {
109 // decode the name
110 URI underLyingURI = decodeHarURI(name, conf);
111 // we got the right har Path- now check if this is
112 // truly a har filesystem
113 Path harPath = archivePath(
114 new Path(name.getScheme(), name.getAuthority(), name.getPath()));
115 if (harPath == null) {
116 throw new IOException("Invalid path for the Har Filesystem. " +
117 name.toString());
118 }
119 if (fs == null) {
120 fs = FileSystem.get(underLyingURI, conf);
121 }
122 uri = harPath.toUri();
123 archivePath = new Path(uri.getPath());
124 harAuth = getHarAuth(underLyingURI);
125 //check for the underlying fs containing
126 // the index file
127 Path masterIndexPath = new Path(archivePath, "_masterindex");
128 Path archiveIndexPath = new Path(archivePath, "_index");
129 if (!fs.exists(masterIndexPath) || !fs.exists(archiveIndexPath)) {
130 throw new IOException("Invalid path for the Har Filesystem. " +
131 "No index file in " + harPath);
132 }
133
134 metadata = harMetaCache.get(uri);
135 if (metadata != null) {
136 FileStatus mStat = fs.getFileStatus(masterIndexPath);
137 FileStatus aStat = fs.getFileStatus(archiveIndexPath);
138 if (mStat.getModificationTime() != metadata.getMasterIndexTimestamp() ||
139 aStat.getModificationTime() != metadata.getArchiveIndexTimestamp()) {
140 // the archive has been overwritten since we last read it
141 // remove the entry from the meta data cache
142 metadata = null;
143 harMetaCache.remove(uri);
144 }
145 }
146 if (metadata == null) {
147 metadata = new HarMetaData(fs, masterIndexPath, archiveIndexPath);
148 metadata.parseMetaData();
149 harMetaCache.put(uri, metadata);
150 }
151 }
152
153 // get the version of the filesystem from the masterindex file
154 // the version is currently not useful since its the first version
155 // of archives
156 public int getHarVersion() throws IOException {
157 if (metadata != null) {
158 return metadata.getVersion();
159 }
160 else {
161 throw new IOException("Invalid meta data for the Har Filesystem");
162 }
163 }
164
165 /*
166 * find the parent path that is the
167 * archive path in the path. The last
168 * path segment that ends with .har is
169 * the path that will be returned.
170 */
171 private Path archivePath(Path p) {
172 Path retPath = null;
173 Path tmp = p;
174 for (int i=0; i< p.depth(); i++) {
175 if (tmp.toString().endsWith(".har")) {
176 retPath = tmp;
177 break;
178 }
179 tmp = tmp.getParent();
180 }
181 return retPath;
182 }
183
184 /**
185 * decode the raw URI to get the underlying URI
186 * @param rawURI raw Har URI
187 * @return filtered URI of the underlying fileSystem
188 */
189 private URI decodeHarURI(URI rawURI, Configuration conf) throws IOException {
190 String tmpAuth = rawURI.getAuthority();
191 //we are using the default file
192 //system in the config
193 //so create a underlying uri and
194 //return it
195 if (tmpAuth == null) {
196 //create a path
197 return FileSystem.getDefaultUri(conf);
198 }
199 String host = rawURI.getHost();
200 if (host == null) {
201 throw new IOException("URI: " + rawURI
202 + " is an invalid Har URI since host==null."
203 + " Expecting har://<scheme>-<host>/<path>.");
204 }
205 int i = host.indexOf('-');
206 if (i < 0) {
207 throw new IOException("URI: " + rawURI
208 + " is an invalid Har URI since '-' not found."
209 + " Expecting har://<scheme>-<host>/<path>.");
210 }
211 final String underLyingScheme = host.substring(0, i);
212 i++;
213 final String underLyingHost = i == host.length()? null: host.substring(i);
214 int underLyingPort = rawURI.getPort();
215 String auth = (underLyingHost == null && underLyingPort == -1)?
216 null:(underLyingHost+
217 (underLyingPort == -1 ? "" : ":"+underLyingPort));
218 URI tmp = null;
219 if (rawURI.getQuery() != null) {
220 // query component not allowed
221 throw new IOException("query component in Path not supported " + rawURI);
222 }
223 try {
224 tmp = new URI(underLyingScheme, auth, rawURI.getPath(),
225 rawURI.getQuery(), rawURI.getFragment());
226 } catch (URISyntaxException e) {
227 // do nothing should not happen
228 }
229 return tmp;
230 }
231
232 private static String decodeString(String str)
233 throws UnsupportedEncodingException {
234 return URLDecoder.decode(str, "UTF-8");
235 }
236
237 private String decodeFileName(String fname)
238 throws UnsupportedEncodingException {
239 int version = metadata.getVersion();
240 if (version == 2 || version == 3){
241 return decodeString(fname);
242 }
243 return fname;
244 }
245
246 /**
247 * return the top level archive.
248 */
249 public Path getWorkingDirectory() {
250 return new Path(uri.toString());
251 }
252
253 /**
254 * Create a har specific auth
255 * har-underlyingfs:port
256 * @param underLyingURI the uri of underlying
257 * filesystem
258 * @return har specific auth
259 */
260 private String getHarAuth(URI underLyingUri) {
261 String auth = underLyingUri.getScheme() + "-";
262 if (underLyingUri.getHost() != null) {
263 auth += underLyingUri.getHost() + ":";
264 if (underLyingUri.getPort() != -1) {
265 auth += underLyingUri.getPort();
266 }
267 }
268 else {
269 auth += ":";
270 }
271 return auth;
272 }
273
274 /**
275 * Returns the uri of this filesystem.
276 * The uri is of the form
277 * har://underlyingfsschema-host:port/pathintheunderlyingfs
278 */
279 @Override
280 public URI getUri() {
281 return this.uri;
282 }
283
284 /**
285 * this method returns the path
286 * inside the har filesystem.
287 * this is relative path inside
288 * the har filesystem.
289 * @param path the fully qualified path in the har filesystem.
290 * @return relative path in the filesystem.
291 */
292 private Path getPathInHar(Path path) {
293 Path harPath = new Path(path.toUri().getPath());
294 if (archivePath.compareTo(harPath) == 0)
295 return new Path(Path.SEPARATOR);
296 Path tmp = new Path(harPath.getName());
297 Path parent = harPath.getParent();
298 while (!(parent.compareTo(archivePath) == 0)) {
299 if (parent.toString().equals(Path.SEPARATOR)) {
300 tmp = null;
301 break;
302 }
303 tmp = new Path(parent.getName(), tmp);
304 parent = parent.getParent();
305 }
306 if (tmp != null)
307 tmp = new Path(Path.SEPARATOR, tmp);
308 return tmp;
309 }
310
311 //the relative path of p. basically
312 // getting rid of /. Parsing and doing
313 // string manipulation is not good - so
314 // just use the path api to do it.
315 private Path makeRelative(String initial, Path p) {
316 String scheme = this.uri.getScheme();
317 String authority = this.uri.getAuthority();
318 Path root = new Path(Path.SEPARATOR);
319 if (root.compareTo(p) == 0)
320 return new Path(scheme, authority, initial);
321 Path retPath = new Path(p.getName());
322 Path parent = p.getParent();
323 for (int i=0; i < p.depth()-1; i++) {
324 retPath = new Path(parent.getName(), retPath);
325 parent = parent.getParent();
326 }
327 return new Path(new Path(scheme, authority, initial),
328 retPath.toString());
329 }
330
331 /* this makes a path qualified in the har filesystem
332 * (non-Javadoc)
333 * @see org.apache.hadoop.fs.FilterFileSystem#makeQualified(
334 * org.apache.hadoop.fs.Path)
335 */
336 @Override
337 public Path makeQualified(Path path) {
338 // make sure that we just get the
339 // path component
340 Path fsPath = path;
341 if (!path.isAbsolute()) {
342 fsPath = new Path(archivePath, path);
343 }
344
345 URI tmpURI = fsPath.toUri();
346 //change this to Har uri
347 return new Path(uri.getScheme(), harAuth, tmpURI.getPath());
348 }
349
350 /**
351 * Fix offset and length of block locations.
352 * Note that this method modifies the original array.
353 * @param locations block locations of har part file
354 * @param start the start of the desired range in the contained file
355 * @param len the length of the desired range
356 * @param fileOffsetInHar the offset of the desired file in the har part file
357 * @return block locations with fixed offset and length
358 */
359 static BlockLocation[] fixBlockLocations(BlockLocation[] locations,
360 long start,
361 long len,
362 long fileOffsetInHar) {
363 // offset 1 past last byte of desired range
364 long end = start + len;
365
366 for (BlockLocation location : locations) {
367 // offset of part block relative to beginning of desired file
368 // (may be negative if file starts in this part block)
369 long harBlockStart = location.getOffset() - fileOffsetInHar;
370 // offset 1 past last byte of har block relative to beginning of
371 // desired file
372 long harBlockEnd = harBlockStart + location.getLength();
373
374 if (start > harBlockStart) {
375 // desired range starts after beginning of this har block
376 // fix offset to beginning of relevant range (relative to desired file)
377 location.setOffset(start);
378 // fix length to relevant portion of har block
379 location.setLength(location.getLength() - (start - harBlockStart));
380 } else {
381 // desired range includes beginning of this har block
382 location.setOffset(harBlockStart);
383 }
384
385 if (harBlockEnd > end) {
386 // range ends before end of this har block
387 // fix length to remove irrelevant portion at the end
388 location.setLength(location.getLength() - (harBlockEnd - end));
389 }
390 }
391
392 return locations;
393 }
394
395 /**
396 * Get block locations from the underlying fs and fix their
397 * offsets and lengths.
398 * @param file the input filestatus to get block locations
399 * @param start the start of the desired range in the contained file
400 * @param len the length of the desired range
401 * @return block locations for this segment of file
402 * @throws IOException
403 */
404 @Override
405 public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
406 long len) throws IOException {
407 HarStatus hstatus = getFileHarStatus(file.getPath());
408 Path partPath = new Path(archivePath, hstatus.getPartName());
409 FileStatus partStatus = metadata.getPartFileStatus(partPath);
410
411 // get all part blocks that overlap with the desired file blocks
412 BlockLocation[] locations =
413 fs.getFileBlockLocations(partStatus,
414 hstatus.getStartIndex() + start, len);
415
416 return fixBlockLocations(locations, start, len, hstatus.getStartIndex());
417 }
418
419 /**
420 * the hash of the path p inside iniside
421 * the filesystem
422 * @param p the path in the harfilesystem
423 * @return the hash code of the path.
424 */
425 public static int getHarHash(Path p) {
426 return (p.toString().hashCode() & 0x7fffffff);
427 }
428
429 static class Store {
430 public Store() {
431 begin = end = startHash = endHash = 0;
432 }
433 public Store(long begin, long end, int startHash, int endHash) {
434 this.begin = begin;
435 this.end = end;
436 this.startHash = startHash;
437 this.endHash = endHash;
438 }
439 public long begin;
440 public long end;
441 public int startHash;
442 public int endHash;
443 }
444
445 /**
446 * Get filestatuses of all the children of a given directory. This just reads
447 * through index file and reads line by line to get all statuses for children
448 * of a directory. Its a brute force way of getting all such filestatuses
449 *
450 * @param parent
451 * the parent path directory
452 * @param statuses
453 * the list to add the children filestatuses to
454 * @param children
455 * the string list of children for this parent
456 * @param archiveIndexStat
457 * the archive index filestatus
458 */
459 private void fileStatusesInIndex(HarStatus parent, List<FileStatus> statuses,
460 List<String> children) throws IOException {
461 String parentString = parent.getName();
462 if (!parentString.endsWith(Path.SEPARATOR)){
463 parentString += Path.SEPARATOR;
464 }
465 Path harPath = new Path(parentString);
466 int harlen = harPath.depth();
467 final Map<String, FileStatus> cache = new TreeMap<String, FileStatus>();
468
469 for (HarStatus hstatus : metadata.archive.values()) {
470 String child = hstatus.getName();
471 if ((child.startsWith(parentString))) {
472 Path thisPath = new Path(child);
473 if (thisPath.depth() == harlen + 1) {
474 statuses.add(toFileStatus(hstatus, cache));
475 }
476 }
477 }
478 }
479
480 /**
481 * Combine the status stored in the index and the underlying status.
482 * @param h status stored in the index
483 * @param cache caching the underlying file statuses
484 * @return the combined file status
485 * @throws IOException
486 */
487 private FileStatus toFileStatus(HarStatus h,
488 Map<String, FileStatus> cache) throws IOException {
489 FileStatus underlying = null;
490 if (cache != null) {
491 underlying = cache.get(h.partName);
492 }
493 if (underlying == null) {
494 final Path p = h.isDir? archivePath: new Path(archivePath, h.partName);
495 underlying = fs.getFileStatus(p);
496 if (cache != null) {
497 cache.put(h.partName, underlying);
498 }
499 }
500
501 long modTime = 0;
502 int version = metadata.getVersion();
503 if (version < 3) {
504 modTime = underlying.getModificationTime();
505 } else if (version == 3) {
506 modTime = h.getModificationTime();
507 }
508
509 return new FileStatus(
510 h.isDir()? 0L: h.getLength(),
511 h.isDir(),
512 underlying.getReplication(),
513 underlying.getBlockSize(),
514 modTime,
515 underlying.getAccessTime(),
516 underlying.getPermission(),
517 underlying.getOwner(),
518 underlying.getGroup(),
519 makeRelative(this.uri.getPath(), new Path(h.name)));
520 }
521
522 // a single line parser for hadoop archives status
523 // stored in a single line in the index files
524 // the format is of the form
525 // filename "dir"/"file" partFileName startIndex length
526 // <space seperated children>
527 private class HarStatus {
528 boolean isDir;
529 String name;
530 List<String> children;
531 String partName;
532 long startIndex;
533 long length;
534 long modificationTime = 0;
535
536 public HarStatus(String harString) throws UnsupportedEncodingException {
537 String[] splits = harString.split(" ");
538 this.name = decodeFileName(splits[0]);
539 this.isDir = "dir".equals(splits[1]) ? true: false;
540 // this is equal to "none" if its a directory
541 this.partName = splits[2];
542 this.startIndex = Long.parseLong(splits[3]);
543 this.length = Long.parseLong(splits[4]);
544
545 int version = metadata.getVersion();
546 String[] propSplits = null;
547 // propSplits is used to retrieve the metainformation that Har versions
548 // 1 & 2 missed (modification time, permission, owner group).
549 // These fields are stored in an encoded string placed in different
550 // locations depending on whether it's a file or directory entry.
551 // If it's a directory, the string will be placed at the partName
552 // location (directories have no partName because they don't have data
553 // to be stored). This is done because the number of fields in a
554 // directory entry is unbounded (all children are listed at the end)
555 // If it's a file, the string will be the last field.
556 if (isDir) {
557 if (version == 3){
558 propSplits = decodeString(this.partName).split(" ");
559 }
560 children = new ArrayList<String>();
561 for (int i = 5; i < splits.length; i++) {
562 children.add(decodeFileName(splits[i]));
563 }
564 } else if (version == 3) {
565 propSplits = decodeString(splits[5]).split(" ");
566 }
567
568 if (propSplits != null && propSplits.length >= 4) {
569 modificationTime = Long.parseLong(propSplits[0]);
570 // the fields below are stored in the file but are currently not used
571 // by HarFileSystem
572 // permission = new FsPermission(Short.parseShort(propSplits[1]));
573 // owner = decodeString(propSplits[2]);
574 // group = decodeString(propSplits[3]);
575 }
576 }
577 public boolean isDir() {
578 return isDir;
579 }
580
581 public String getName() {
582 return name;
583 }
584
585 public List<String> getChildren() {
586 return children;
587 }
588 public String getFileName() {
589 return name;
590 }
591 public String getPartName() {
592 return partName;
593 }
594 public long getStartIndex() {
595 return startIndex;
596 }
597 public long getLength() {
598 return length;
599 }
600 public long getModificationTime() {
601 return modificationTime;
602 }
603 }
604
605 /**
606 * return the filestatus of files in har archive.
607 * The permission returned are that of the archive
608 * index files. The permissions are not persisted
609 * while creating a hadoop archive.
610 * @param f the path in har filesystem
611 * @return filestatus.
612 * @throws IOException
613 */
614 @Override
615 public FileStatus getFileStatus(Path f) throws IOException {
616 HarStatus hstatus = getFileHarStatus(f);
617 return toFileStatus(hstatus, null);
618 }
619
620 private HarStatus getFileHarStatus(Path f) throws IOException {
621 // get the fs DataInputStream for the underlying file
622 // look up the index.
623 Path p = makeQualified(f);
624 Path harPath = getPathInHar(p);
625 if (harPath == null) {
626 throw new IOException("Invalid file name: " + f + " in " + uri);
627 }
628 HarStatus hstatus = metadata.archive.get(harPath);
629 if (hstatus == null) {
630 throw new FileNotFoundException("File: " + f + " does not exist in " + uri);
631 }
632 return hstatus;
633 }
634
635 /**
636 * @return null since no checksum algorithm is implemented.
637 */
638 public FileChecksum getFileChecksum(Path f) {
639 return null;
640 }
641
642 /**
643 * Returns a har input stream which fakes end of
644 * file. It reads the index files to get the part
645 * file name and the size and start of the file.
646 */
647 @Override
648 public FSDataInputStream open(Path f, int bufferSize) throws IOException {
649 // get the fs DataInputStream for the underlying file
650 HarStatus hstatus = getFileHarStatus(f);
651 // we got it.. woo hooo!!!
652 if (hstatus.isDir()) {
653 throw new FileNotFoundException(f + " : not a file in " +
654 archivePath);
655 }
656 return new HarFSDataInputStream(fs, new Path(archivePath,
657 hstatus.getPartName()),
658 hstatus.getStartIndex(), hstatus.getLength(), bufferSize);
659 }
660
661 /*
662 * create throws an exception in Har filesystem.
663 * The archive once created cannot be changed.
664 */
665 public FSDataOutputStream create(Path f, int bufferSize)
666 throws IOException {
667 throw new IOException("Har: Create not allowed");
668 }
669
670 public FSDataOutputStream create(Path f,
671 FsPermission permission,
672 boolean overwrite,
673 int bufferSize,
674 short replication,
675 long blockSize,
676 Progressable progress) throws IOException {
677 throw new IOException("Har: create not allowed.");
678 }
679
680 @Override
681 public void close() throws IOException {
682 if (fs != null) {
683 try {
684 fs.close();
685 } catch(IOException ie) {
686 //this might already be closed
687 // ignore
688 }
689 }
690 }
691
692 /**
693 * Not implemented.
694 */
695 @Override
696 public boolean setReplication(Path src, short replication) throws IOException{
697 throw new IOException("Har: setreplication not allowed");
698 }
699
700 /**
701 * Not implemented.
702 */
703 @Override
704 public boolean delete(Path f, boolean recursive) throws IOException {
705 throw new IOException("Har: delete not allowed");
706 }
707
708 /**
709 * liststatus returns the children of a directory
710 * after looking up the index files.
711 */
712 @Override
713 public FileStatus[] listStatus(Path f) throws IOException {
714 //need to see if the file is an index in file
715 //get the filestatus of the archive directory
716 // we will create fake filestatuses to return
717 // to the client
718 List<FileStatus> statuses = new ArrayList<FileStatus>();
719 Path tmpPath = makeQualified(f);
720 Path harPath = getPathInHar(tmpPath);
721 HarStatus hstatus = metadata.archive.get(harPath);
722 if (hstatus == null) {
723 throw new FileNotFoundException("File " + f + " not found in " + archivePath);
724 }
725 if (hstatus.isDir()) {
726 fileStatusesInIndex(hstatus, statuses, hstatus.children);
727 } else {
728 statuses.add(toFileStatus(hstatus, null));
729 }
730
731 return statuses.toArray(new FileStatus[statuses.size()]);
732 }
733
734 /**
735 * return the top level archive path.
736 */
737 public Path getHomeDirectory() {
738 return new Path(uri.toString());
739 }
740
741 public void setWorkingDirectory(Path newDir) {
742 //does nothing.
743 }
744
745 /**
746 * not implemented.
747 */
748 public boolean mkdirs(Path f, FsPermission permission) throws IOException {
749 throw new IOException("Har: mkdirs not allowed");
750 }
751
752 /**
753 * not implemented.
754 */
755 public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws
756 IOException {
757 throw new IOException("Har: copyfromlocalfile not allowed");
758 }
759
760 /**
761 * copies the file in the har filesystem to a local file.
762 */
763 public void copyToLocalFile(boolean delSrc, Path src, Path dst)
764 throws IOException {
765 FileUtil.copy(this, src, getLocal(getConf()), dst, false, getConf());
766 }
767
768 /**
769 * not implemented.
770 */
771 public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
772 throws IOException {
773 throw new IOException("Har: startLocalOutput not allowed");
774 }
775
776 /**
777 * not implemented.
778 */
779 public void completeLocalOutput(Path fsOutputFile, Path tmpLocalFile)
780 throws IOException {
781 throw new IOException("Har: completeLocalOutput not allowed");
782 }
783
784 /**
785 * not implemented.
786 */
787 public void setOwner(Path p, String username, String groupname)
788 throws IOException {
789 throw new IOException("Har: setowner not allowed");
790 }
791
792 /**
793 * Not implemented.
794 */
795 public void setPermission(Path p, FsPermission permisssion)
796 throws IOException {
797 throw new IOException("Har: setPermission not allowed");
798 }
799
800 /**
801 * Hadoop archives input stream. This input stream fakes EOF
802 * since archive files are part of bigger part files.
803 */
804 private static class HarFSDataInputStream extends FSDataInputStream {
805 /**
806 * Create an input stream that fakes all the reads/positions/seeking.
807 */
808 private static class HarFsInputStream extends FSInputStream {
809 private long position, start, end;
810 //The underlying data input stream that the
811 // underlying filesystem will return.
812 private FSDataInputStream underLyingStream;
813 //one byte buffer
814 private byte[] oneBytebuff = new byte[1];
815 HarFsInputStream(FileSystem fs, Path path, long start,
816 long length, int bufferSize) throws IOException {
817 underLyingStream = fs.open(path, bufferSize);
818 underLyingStream.seek(start);
819 // the start of this file in the part file
820 this.start = start;
821 // the position pointer in the part file
822 this.position = start;
823 // the end pointer in the part file
824 this.end = start + length;
825 }
826
827 public synchronized int available() throws IOException {
828 long remaining = end - underLyingStream.getPos();
829 if (remaining > (long)Integer.MAX_VALUE) {
830 return Integer.MAX_VALUE;
831 }
832 return (int) remaining;
833 }
834
835 public synchronized void close() throws IOException {
836 underLyingStream.close();
837 super.close();
838 }
839
840 //not implemented
841 @Override
842 public void mark(int readLimit) {
843 // do nothing
844 }
845
846 /**
847 * reset is not implemented
848 */
849 public void reset() throws IOException {
850 throw new IOException("reset not implemented.");
851 }
852
853 public synchronized int read() throws IOException {
854 int ret = read(oneBytebuff, 0, 1);
855 return (ret <= 0) ? -1: (oneBytebuff[0] & 0xff);
856 }
857
858 public synchronized int read(byte[] b) throws IOException {
859 int ret = read(b, 0, b.length);
860 if (ret != -1) {
861 position += ret;
862 }
863 return ret;
864 }
865
866 /**
867 *
868 */
869 public synchronized int read(byte[] b, int offset, int len)
870 throws IOException {
871 int newlen = len;
872 int ret = -1;
873 if (position + len > end) {
874 newlen = (int) (end - position);
875 }
876 // end case
877 if (newlen == 0)
878 return ret;
879 ret = underLyingStream.read(b, offset, newlen);
880 position += ret;
881 return ret;
882 }
883
884 public synchronized long skip(long n) throws IOException {
885 long tmpN = n;
886 if (tmpN > 0) {
887 if (position + tmpN > end) {
888 tmpN = end - position;
889 }
890 underLyingStream.seek(tmpN + position);
891 position += tmpN;
892 return tmpN;
893 }
894 return (tmpN < 0)? -1 : 0;
895 }
896
897 public synchronized long getPos() throws IOException {
898 return (position - start);
899 }
900
901 public synchronized void seek(long pos) throws IOException {
902 if (pos < 0 || (start + pos > end)) {
903 throw new IOException("Failed to seek: EOF");
904 }
905 position = start + pos;
906 underLyingStream.seek(position);
907 }
908
909 public boolean seekToNewSource(long targetPos) throws IOException {
910 //do not need to implement this
911 // hdfs in itself does seektonewsource
912 // while reading.
913 return false;
914 }
915
916 /**
917 * implementing position readable.
918 */
919 public int read(long pos, byte[] b, int offset, int length)
920 throws IOException {
921 int nlength = length;
922 if (start + nlength + pos > end) {
923 nlength = (int) (end - (start + pos));
924 }
925 return underLyingStream.read(pos + start , b, offset, nlength);
926 }
927
928 /**
929 * position readable again.
930 */
931 public void readFully(long pos, byte[] b, int offset, int length)
932 throws IOException {
933 if (start + length + pos > end) {
934 throw new IOException("Not enough bytes to read.");
935 }
936 underLyingStream.readFully(pos + start, b, offset, length);
937 }
938
939 public void readFully(long pos, byte[] b) throws IOException {
940 readFully(pos, b, 0, b.length);
941 }
942
943 }
944
945 /**
946 * constructors for har input stream.
947 * @param fs the underlying filesystem
948 * @param p The path in the underlying filesystem
949 * @param start the start position in the part file
950 * @param length the length of valid data in the part file
951 * @param bufsize the buffer size
952 * @throws IOException
953 */
954 public HarFSDataInputStream(FileSystem fs, Path p, long start,
955 long length, int bufsize) throws IOException {
956 super(new HarFsInputStream(fs, p, start, length, bufsize));
957 }
958
959 /**
960 * constructor for har input stream.
961 * @param fs the underlying filesystem
962 * @param p the path in the underlying file system
963 * @param start the start position in the part file
964 * @param length the length of valid data in the part file.
965 * @throws IOException
966 */
967 public HarFSDataInputStream(FileSystem fs, Path p, long start, long length)
968 throws IOException {
969 super(new HarFsInputStream(fs, p, start, length, 0));
970 }
971 }
972
973 private class HarMetaData {
974 private FileSystem fs;
975 private int version;
976 // the masterIndex of the archive
977 private Path masterIndexPath;
978 // the index file
979 private Path archiveIndexPath;
980
981 private long masterIndexTimestamp;
982 private long archiveIndexTimestamp;
983
984 List<Store> stores = new ArrayList<Store>();
985 Map<Path, HarStatus> archive = new HashMap<Path, HarStatus>();
986 private Map<Path, FileStatus> partFileStatuses = new HashMap<Path, FileStatus>();
987
988 public HarMetaData(FileSystem fs, Path masterIndexPath, Path archiveIndexPath) {
989 this.fs = fs;
990 this.masterIndexPath = masterIndexPath;
991 this.archiveIndexPath = archiveIndexPath;
992 }
993
994 public FileStatus getPartFileStatus(Path partPath) throws IOException {
995 FileStatus status;
996 status = partFileStatuses.get(partPath);
997 if (status == null) {
998 status = fs.getFileStatus(partPath);
999 partFileStatuses.put(partPath, status);
1000 }
1001 return status;
1002 }
1003
1004 public long getMasterIndexTimestamp() {
1005 return masterIndexTimestamp;
1006 }
1007
1008 public long getArchiveIndexTimestamp() {
1009 return archiveIndexTimestamp;
1010 }
1011
1012 private int getVersion() {
1013 return version;
1014 }
1015
1016 private void parseMetaData() throws IOException {
1017 FSDataInputStream in = fs.open(masterIndexPath);
1018 FileStatus masterStat = fs.getFileStatus(masterIndexPath);
1019 masterIndexTimestamp = masterStat.getModificationTime();
1020 LineReader lin = new LineReader(in, getConf());
1021 Text line = new Text();
1022 long read = lin.readLine(line);
1023
1024 // the first line contains the version of the index file
1025 String versionLine = line.toString();
1026 String[] arr = versionLine.split(" ");
1027 version = Integer.parseInt(arr[0]);
1028 // make it always backwards-compatible
1029 if (this.version > HarFileSystem.VERSION) {
1030 throw new IOException("Invalid version " +
1031 this.version + " expected " + HarFileSystem.VERSION);
1032 }
1033
1034 // each line contains a hashcode range and the index file name
1035 String[] readStr = null;
1036 while(read < masterStat.getLen()) {
1037 int b = lin.readLine(line);
1038 read += b;
1039 readStr = line.toString().split(" ");
1040 int startHash = Integer.parseInt(readStr[0]);
1041 int endHash = Integer.parseInt(readStr[1]);
1042 stores.add(new Store(Long.parseLong(readStr[2]),
1043 Long.parseLong(readStr[3]), startHash,
1044 endHash));
1045 line.clear();
1046 }
1047 try {
1048 // close the master index
1049 lin.close();
1050 } catch(IOException io){
1051 // do nothing just a read.
1052 }
1053
1054 FSDataInputStream aIn = fs.open(archiveIndexPath);
1055 FileStatus archiveStat = fs.getFileStatus(archiveIndexPath);
1056 archiveIndexTimestamp = archiveStat.getModificationTime();
1057 LineReader aLin;
1058 String retStr = null;
1059 // now start reading the real index file
1060 for (Store s: stores) {
1061 read = 0;
1062 aIn.seek(s.begin);
1063 aLin = new LineReader(aIn, getConf());
1064 while (read + s.begin < s.end) {
1065 int tmp = aLin.readLine(line);
1066 read += tmp;
1067 String lineFeed = line.toString();
1068 String[] parsed = lineFeed.split(" ");
1069 parsed[0] = decodeFileName(parsed[0]);
1070 archive.put(new Path(parsed[0]), new HarStatus(lineFeed));
1071 line.clear();
1072 }
1073 }
1074 try {
1075 // close the archive index
1076 aIn.close();
1077 } catch(IOException io) {
1078 // do nothing just a read.
1079 }
1080 }
1081 }
1082 }