001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019 package org.apache.hadoop.io; 020 021 import java.io.IOException; 022 import java.io.DataInput; 023 import java.io.DataOutput; 024 import java.io.DataOutputStream; 025 import java.io.DataInputStream; 026 import java.io.ByteArrayOutputStream; 027 import java.io.ByteArrayInputStream; 028 import java.util.zip.Deflater; 029 import java.util.zip.DeflaterOutputStream; 030 import java.util.zip.InflaterInputStream; 031 032 import org.apache.hadoop.classification.InterfaceAudience; 033 import org.apache.hadoop.classification.InterfaceStability; 034 035 /** A base-class for Writables which store themselves compressed and lazily 036 * inflate on field access. This is useful for large objects whose fields are 037 * not be altered during a map or reduce operation: leaving the field data 038 * compressed makes copying the instance from one file to another much 039 * faster. */ 040 @InterfaceAudience.Public 041 @InterfaceStability.Stable 042 public abstract class CompressedWritable implements Writable { 043 // if non-null, the compressed field data of this instance. 044 private byte[] compressed; 045 046 public CompressedWritable() {} 047 048 public final void readFields(DataInput in) throws IOException { 049 compressed = new byte[in.readInt()]; 050 in.readFully(compressed, 0, compressed.length); 051 } 052 053 /** Must be called by all methods which access fields to ensure that the data 054 * has been uncompressed. */ 055 protected void ensureInflated() { 056 if (compressed != null) { 057 try { 058 ByteArrayInputStream deflated = new ByteArrayInputStream(compressed); 059 DataInput inflater = 060 new DataInputStream(new InflaterInputStream(deflated)); 061 readFieldsCompressed(inflater); 062 compressed = null; 063 } catch (IOException e) { 064 throw new RuntimeException(e); 065 } 066 } 067 } 068 069 /** Subclasses implement this instead of {@link #readFields(DataInput)}. */ 070 protected abstract void readFieldsCompressed(DataInput in) 071 throws IOException; 072 073 public final void write(DataOutput out) throws IOException { 074 if (compressed == null) { 075 ByteArrayOutputStream deflated = new ByteArrayOutputStream(); 076 Deflater deflater = new Deflater(Deflater.BEST_SPEED); 077 DataOutputStream dout = 078 new DataOutputStream(new DeflaterOutputStream(deflated, deflater)); 079 writeCompressed(dout); 080 dout.close(); 081 deflater.end(); 082 compressed = deflated.toByteArray(); 083 } 084 out.writeInt(compressed.length); 085 out.write(compressed); 086 } 087 088 /** Subclasses implement this instead of {@link #write(DataOutput)}. */ 089 protected abstract void writeCompressed(DataOutput out) throws IOException; 090 091 }