org.apache.hadoop.mapred
Class Task

java.lang.Object
  extended by org.apache.hadoop.mapred.Task
All Implemented Interfaces:
org.apache.hadoop.conf.Configurable, org.apache.hadoop.io.Writable
Direct Known Subclasses:
ReduceTask

@InterfaceAudience.LimitedPrivate(value="MapReduce")
@InterfaceStability.Unstable
public abstract class Task
extends Object
implements org.apache.hadoop.io.Writable, org.apache.hadoop.conf.Configurable

Base class for tasks.


Nested Class Summary
static class Task.CombineOutputCollector<K,V>
          OutputCollector for the combiner.
protected static class Task.CombinerRunner<K,V>
           
static class Task.CombineValuesIterator<KEY,VALUE>
          Iterator to return Combined values
static class Task.Counter
          Deprecated. Provided for compatibility. Use TaskCounter instead.
protected static class Task.NewCombinerRunner<K,V>
           
protected static class Task.OldCombinerRunner<K,V>
           
protected  class Task.TaskReporter
           
 
Field Summary
protected  OutputCommitter committer
           
protected  JobConf conf
           
static long DEFAULT_COMBINE_RECORDS_BEFORE_PROGRESS
           
protected  org.apache.hadoop.io.BytesWritable extraData
           
protected  Counters.Counter failedShuffleCounter
           
protected static String FILESYSTEM_COUNTER_GROUP
          Name of the FileSystem counters' group
protected  org.apache.hadoop.mapred.Task.GcTimeUpdater gcUpdater
           
protected  boolean jobCleanup
           
protected  JobContext jobContext
           
protected  JobStatus.State jobRunStateForCleanup
           
protected  boolean jobSetup
           
protected  org.apache.hadoop.fs.LocalDirAllocator lDirAlloc
           
protected  MapOutputFile mapOutputFile
           
static String MERGED_OUTPUT_PREFIX
           
protected  Counters.Counter mergedMapOutputsCounter
           
protected  OutputFormat<?,?> outputFormat
           
static int PROGRESS_INTERVAL
          The number of milliseconds between progress reports.
protected  Counters.Counter spilledRecordsCounter
           
protected  boolean taskCleanup
           
protected  TaskAttemptContext taskContext
           
protected  SecretKey tokenSecret
           
protected  TaskUmbilicalProtocol umbilical
           
 
Constructor Summary
Task()
           
Task(String jobFile, TaskAttemptID taskId, int partition, int numSlotsRequired)
           
 
Method Summary
protected static
<INKEY,INVALUE,OUTKEY,OUTVALUE>
Reducer.Context
createReduceContext(Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE> reducer, org.apache.hadoop.conf.Configuration job, TaskAttemptID taskId, RawKeyValueIterator rIter, Counter inputKeyCounter, Counter inputValueCounter, RecordWriter<OUTKEY,OUTVALUE> output, OutputCommitter committer, StatusReporter reporter, org.apache.hadoop.io.RawComparator<INKEY> comparator, Class<INKEY> keyClass, Class<INVALUE> valueClass)
           
 void done(TaskUmbilicalProtocol umbilical, Task.TaskReporter reporter)
           
 org.apache.hadoop.conf.Configuration getConf()
           
protected static String[] getFileSystemCounterNames(String uriScheme)
          Counters to measure the usage of the different file systems.
protected static List<org.apache.hadoop.fs.FileSystem.Statistics> getFsStatistics(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration conf)
          Gets a handle to the Statistics instance based on the scheme associated with path.
 String getJobFile()
           
 JobID getJobID()
          Get the job name for this task.
 SecretKey getJobTokenSecret()
          Get the job token secret
 MapOutputFile getMapOutputFile()
           
 int getNumSlotsRequired()
           
 int getPartition()
          Get the index of this task within the job.
 TaskStatus.Phase getPhase()
          Return current phase of the task.
 org.apache.hadoop.util.Progress getProgress()
           
 org.apache.hadoop.mapred.SortedRanges getSkipRanges()
          Get skipRanges.
 TaskAttemptID getTaskID()
           
 void initialize(JobConf job, JobID id, Reporter reporter, boolean useNewApi)
           
abstract  boolean isMapTask()
           
 boolean isSkipping()
          Is Task in skipping mode.
protected  boolean keepTaskFiles(JobConf conf)
           
 void localizeConfiguration(JobConf conf)
          Localize the given JobConf to be specific for this task.
 void readFields(DataInput in)
           
protected  void reportFatalError(TaskAttemptID id, Throwable throwable, String logMsg)
          Report a fatal error to the parent (task) tracker.
protected  void reportNextRecordRange(TaskUmbilicalProtocol umbilical, long nextRecIndex)
          Reports the next executing record range to TaskTracker.
abstract  void run(JobConf job, TaskUmbilicalProtocol umbilical)
          Run this task as a part of the named job.
protected  void runJobCleanupTask(TaskUmbilicalProtocol umbilical, Task.TaskReporter reporter)
           
protected  void runJobSetupTask(TaskUmbilicalProtocol umbilical, Task.TaskReporter reporter)
           
protected  void runTaskCleanupTask(TaskUmbilicalProtocol umbilical, Task.TaskReporter reporter)
           
 void setConf(org.apache.hadoop.conf.Configuration conf)
           
 void setJobFile(String jobFile)
           
 void setJobTokenSecret(SecretKey tokenSecret)
          Set the job token secret
protected  void setPhase(TaskStatus.Phase phase)
          Set current phase of the task.
 void setSkipping(boolean skipping)
          Sets whether to run Task in skipping mode.
 void setSkipRanges(org.apache.hadoop.mapred.SortedRanges skipRanges)
          Set skipRanges.
protected  void setWriteSkipRecs(boolean writeSkipRecs)
          Set whether to write skip records.
 void statusUpdate(TaskUmbilicalProtocol umbilical)
          Send a status update to the task tracker
 String toString()
           
protected  boolean toWriteSkipRecs()
          Get whether to write skip records.
 void write(DataOutput out)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
 

Field Detail

MERGED_OUTPUT_PREFIX

public static String MERGED_OUTPUT_PREFIX

DEFAULT_COMBINE_RECORDS_BEFORE_PROGRESS

public static final long DEFAULT_COMBINE_RECORDS_BEFORE_PROGRESS
See Also:
Constant Field Values

FILESYSTEM_COUNTER_GROUP

protected static final String FILESYSTEM_COUNTER_GROUP
Name of the FileSystem counters' group

See Also:
Constant Field Values

jobRunStateForCleanup

protected JobStatus.State jobRunStateForCleanup

jobCleanup

protected boolean jobCleanup

jobSetup

protected boolean jobSetup

taskCleanup

protected boolean taskCleanup

extraData

protected org.apache.hadoop.io.BytesWritable extraData

conf

protected JobConf conf

mapOutputFile

protected MapOutputFile mapOutputFile

lDirAlloc

protected org.apache.hadoop.fs.LocalDirAllocator lDirAlloc

jobContext

protected JobContext jobContext

taskContext

protected TaskAttemptContext taskContext

outputFormat

protected OutputFormat<?,?> outputFormat

committer

protected OutputCommitter committer

spilledRecordsCounter

protected final Counters.Counter spilledRecordsCounter

failedShuffleCounter

protected final Counters.Counter failedShuffleCounter

mergedMapOutputsCounter

protected final Counters.Counter mergedMapOutputsCounter

umbilical

protected TaskUmbilicalProtocol umbilical

tokenSecret

protected SecretKey tokenSecret

gcUpdater

protected org.apache.hadoop.mapred.Task.GcTimeUpdater gcUpdater

PROGRESS_INTERVAL

public static final int PROGRESS_INTERVAL
The number of milliseconds between progress reports.

See Also:
Constant Field Values
Constructor Detail

Task

public Task()

Task

public Task(String jobFile,
            TaskAttemptID taskId,
            int partition,
            int numSlotsRequired)
Method Detail

getFileSystemCounterNames

protected static String[] getFileSystemCounterNames(String uriScheme)
Counters to measure the usage of the different file systems. Always return the String array with two elements. First one is the name of BYTES_READ counter and second one is of the BYTES_WRITTEN counter.


setJobFile

public void setJobFile(String jobFile)

getJobFile

public String getJobFile()

getTaskID

public TaskAttemptID getTaskID()

getNumSlotsRequired

public int getNumSlotsRequired()

getJobID

public JobID getJobID()
Get the job name for this task.

Returns:
the job name

setJobTokenSecret

public void setJobTokenSecret(SecretKey tokenSecret)
Set the job token secret

Parameters:
tokenSecret - the secret

getJobTokenSecret

public SecretKey getJobTokenSecret()
Get the job token secret

Returns:
the token secret

getPartition

public int getPartition()
Get the index of this task within the job.

Returns:
the integer part of the task id

getPhase

public TaskStatus.Phase getPhase()
Return current phase of the task. needs to be synchronized as communication thread sends the phase every second

Returns:
the curent phase of the task

setPhase

protected void setPhase(TaskStatus.Phase phase)
Set current phase of the task.

Parameters:
phase - task phase

toWriteSkipRecs

protected boolean toWriteSkipRecs()
Get whether to write skip records.


setWriteSkipRecs

protected void setWriteSkipRecs(boolean writeSkipRecs)
Set whether to write skip records.


reportFatalError

protected void reportFatalError(TaskAttemptID id,
                                Throwable throwable,
                                String logMsg)
Report a fatal error to the parent (task) tracker.


getFsStatistics

protected static List<org.apache.hadoop.fs.FileSystem.Statistics> getFsStatistics(org.apache.hadoop.fs.Path path,
                                                                                  org.apache.hadoop.conf.Configuration conf)
                                                                           throws IOException
Gets a handle to the Statistics instance based on the scheme associated with path.

Parameters:
path - the path.
conf - the configuration to extract the scheme from if not part of the path.
Returns:
a Statistics instance, or null if none is found for the scheme.
Throws:
IOException

getSkipRanges

public org.apache.hadoop.mapred.SortedRanges getSkipRanges()
Get skipRanges.


setSkipRanges

public void setSkipRanges(org.apache.hadoop.mapred.SortedRanges skipRanges)
Set skipRanges.


isSkipping

public boolean isSkipping()
Is Task in skipping mode.


setSkipping

public void setSkipping(boolean skipping)
Sets whether to run Task in skipping mode.

Parameters:
skipping -

write

public void write(DataOutput out)
           throws IOException
Specified by:
write in interface org.apache.hadoop.io.Writable
Throws:
IOException

readFields

public void readFields(DataInput in)
                throws IOException
Specified by:
readFields in interface org.apache.hadoop.io.Writable
Throws:
IOException

toString

public String toString()
Overrides:
toString in class Object

localizeConfiguration

public void localizeConfiguration(JobConf conf)
                           throws IOException
Localize the given JobConf to be specific for this task.

Throws:
IOException

run

public abstract void run(JobConf job,
                         TaskUmbilicalProtocol umbilical)
                  throws IOException,
                         ClassNotFoundException,
                         InterruptedException
Run this task as a part of the named job. This method is executed in the child process and is what invokes user-supplied map, reduce, etc. methods.

Parameters:
umbilical - for progress reports
Throws:
IOException
ClassNotFoundException
InterruptedException

isMapTask

public abstract boolean isMapTask()

getProgress

public org.apache.hadoop.util.Progress getProgress()

initialize

public void initialize(JobConf job,
                       JobID id,
                       Reporter reporter,
                       boolean useNewApi)
                throws IOException,
                       ClassNotFoundException,
                       InterruptedException
Throws:
IOException
ClassNotFoundException
InterruptedException

reportNextRecordRange

protected void reportNextRecordRange(TaskUmbilicalProtocol umbilical,
                                     long nextRecIndex)
                              throws IOException
Reports the next executing record range to TaskTracker.

Parameters:
umbilical -
nextRecIndex - the record index which would be fed next.
Throws:
IOException

done

public void done(TaskUmbilicalProtocol umbilical,
                 Task.TaskReporter reporter)
          throws IOException,
                 InterruptedException
Throws:
IOException
InterruptedException

statusUpdate

public void statusUpdate(TaskUmbilicalProtocol umbilical)
                  throws IOException
Send a status update to the task tracker

Parameters:
umbilical -
Throws:
IOException

runTaskCleanupTask

protected void runTaskCleanupTask(TaskUmbilicalProtocol umbilical,
                                  Task.TaskReporter reporter)
                           throws IOException,
                                  InterruptedException
Throws:
IOException
InterruptedException

runJobCleanupTask

protected void runJobCleanupTask(TaskUmbilicalProtocol umbilical,
                                 Task.TaskReporter reporter)
                          throws IOException,
                                 InterruptedException
Throws:
IOException
InterruptedException

keepTaskFiles

protected boolean keepTaskFiles(JobConf conf)

runJobSetupTask

protected void runJobSetupTask(TaskUmbilicalProtocol umbilical,
                               Task.TaskReporter reporter)
                        throws IOException,
                               InterruptedException
Throws:
IOException
InterruptedException

setConf

public void setConf(org.apache.hadoop.conf.Configuration conf)
Specified by:
setConf in interface org.apache.hadoop.conf.Configurable

getConf

public org.apache.hadoop.conf.Configuration getConf()
Specified by:
getConf in interface org.apache.hadoop.conf.Configurable

getMapOutputFile

public MapOutputFile getMapOutputFile()

createReduceContext

protected static <INKEY,INVALUE,OUTKEY,OUTVALUE> Reducer.Context createReduceContext(Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE> reducer,
                                                                                     org.apache.hadoop.conf.Configuration job,
                                                                                     TaskAttemptID taskId,
                                                                                     RawKeyValueIterator rIter,
                                                                                     Counter inputKeyCounter,
                                                                                     Counter inputValueCounter,
                                                                                     RecordWriter<OUTKEY,OUTVALUE> output,
                                                                                     OutputCommitter committer,
                                                                                     StatusReporter reporter,
                                                                                     org.apache.hadoop.io.RawComparator<INKEY> comparator,
                                                                                     Class<INKEY> keyClass,
                                                                                     Class<INVALUE> valueClass)
                                              throws IOException,
                                                     InterruptedException
Throws:
IOException
InterruptedException


Copyright © 2012 Apache Software Foundation. All Rights Reserved.