kafka.etl
Class KafkaETLInputFormat

java.lang.Object
  extended by org.apache.hadoop.mapred.FileInputFormat<K,V>
      extended by org.apache.hadoop.mapred.SequenceFileInputFormat<KafkaETLKey,org.apache.hadoop.io.BytesWritable>
          extended by kafka.etl.KafkaETLInputFormat
All Implemented Interfaces:
org.apache.hadoop.mapred.InputFormat<KafkaETLKey,org.apache.hadoop.io.BytesWritable>

public class KafkaETLInputFormat
extends org.apache.hadoop.mapred.SequenceFileInputFormat<KafkaETLKey,org.apache.hadoop.io.BytesWritable>


Field Summary
protected  int _bufferSize
           
protected  kafka.consumer.SimpleConsumer _consumer
           
protected  long _count
           
protected  boolean _ignoreErrors
           
protected  org.apache.hadoop.mapred.lib.MultipleOutputs _mos
           
protected  int _nodeId
           
protected  java.util.Map<java.lang.Integer,java.net.URI> _nodes
           
protected  long _offset
           
protected  org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable> _offsetOut
           
protected  long[] _offsetRange
           
protected  int _partition
           
protected  Props _props
           
protected  int _retry
           
protected  int _soTimeout
           
protected  long _startOffset
           
protected  long _timestamp
           
protected  boolean _toContinue
           
protected  java.lang.String _topic
           
 
Fields inherited from class org.apache.hadoop.mapred.FileInputFormat
LOG
 
Constructor Summary
KafkaETLInputFormat()
           
 
Method Summary
 org.apache.hadoop.mapred.RecordReader<KafkaETLKey,org.apache.hadoop.io.BytesWritable> getRecordReader(org.apache.hadoop.mapred.InputSplit split, org.apache.hadoop.mapred.JobConf job, org.apache.hadoop.mapred.Reporter reporter)
           
 org.apache.hadoop.mapred.InputSplit[] getSplits(org.apache.hadoop.mapred.JobConf conf, int numSplits)
           
protected  boolean isSplitable(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path file)
           
 
Methods inherited from class org.apache.hadoop.mapred.SequenceFileInputFormat
listStatus
 
Methods inherited from class org.apache.hadoop.mapred.FileInputFormat
addInputPath, addInputPaths, computeSplitSize, getBlockIndex, getInputPathFilter, getInputPaths, getSplitHosts, setInputPathFilter, setInputPaths, setInputPaths, setMinSplitSize
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

_props

protected Props _props

_bufferSize

protected int _bufferSize

_soTimeout

protected int _soTimeout

_nodes

protected java.util.Map<java.lang.Integer,java.net.URI> _nodes

_partition

protected int _partition

_nodeId

protected int _nodeId

_topic

protected java.lang.String _topic

_consumer

protected kafka.consumer.SimpleConsumer _consumer

_mos

protected org.apache.hadoop.mapred.lib.MultipleOutputs _mos

_offsetOut

protected org.apache.hadoop.mapred.OutputCollector<org.apache.hadoop.io.BytesWritable,org.apache.hadoop.io.BytesWritable> _offsetOut

_offsetRange

protected long[] _offsetRange

_startOffset

protected long _startOffset

_offset

protected long _offset

_toContinue

protected boolean _toContinue

_retry

protected int _retry

_timestamp

protected long _timestamp

_count

protected long _count

_ignoreErrors

protected boolean _ignoreErrors
Constructor Detail

KafkaETLInputFormat

public KafkaETLInputFormat()
Method Detail

getRecordReader

public org.apache.hadoop.mapred.RecordReader<KafkaETLKey,org.apache.hadoop.io.BytesWritable> getRecordReader(org.apache.hadoop.mapred.InputSplit split,
                                                                                                             org.apache.hadoop.mapred.JobConf job,
                                                                                                             org.apache.hadoop.mapred.Reporter reporter)
                                                                                                      throws java.io.IOException
Specified by:
getRecordReader in interface org.apache.hadoop.mapred.InputFormat<KafkaETLKey,org.apache.hadoop.io.BytesWritable>
Overrides:
getRecordReader in class org.apache.hadoop.mapred.SequenceFileInputFormat<KafkaETLKey,org.apache.hadoop.io.BytesWritable>
Throws:
java.io.IOException

isSplitable

protected boolean isSplitable(org.apache.hadoop.fs.FileSystem fs,
                              org.apache.hadoop.fs.Path file)
Overrides:
isSplitable in class org.apache.hadoop.mapred.FileInputFormat<KafkaETLKey,org.apache.hadoop.io.BytesWritable>

getSplits

public org.apache.hadoop.mapred.InputSplit[] getSplits(org.apache.hadoop.mapred.JobConf conf,
                                                       int numSplits)
                                                throws java.io.IOException
Specified by:
getSplits in interface org.apache.hadoop.mapred.InputFormat<KafkaETLKey,org.apache.hadoop.io.BytesWritable>
Overrides:
getSplits in class org.apache.hadoop.mapred.FileInputFormat<KafkaETLKey,org.apache.hadoop.io.BytesWritable>
Throws:
java.io.IOException