|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||
java.lang.Objectorg.apache.hadoop.mapreduce.OutputFormat<org.apache.hadoop.io.Text,Mutation>
org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat
public class AccumuloOutputFormat
This class allows MapReduce jobs to use Accumulo as the sink for data. This OutputFormat accepts keys and values of type Text (for a table
name) and Mutation from the Map and Reduce functions.
The user must specify the following via static configurator methods:
setConnectorInfo(Job, String, AuthenticationToken)
setZooKeeperInstance(Job, String, String) OR setMockInstance(Job, String)
| Nested Class Summary | |
|---|---|
protected static class |
AccumuloOutputFormat.AccumuloRecordWriter
A base class to be used to create RecordWriter instances that write to Accumulo. |
| Field Summary | |
|---|---|
protected static org.apache.log4j.Logger |
log
|
| Constructor Summary | |
|---|---|
AccumuloOutputFormat()
|
|
| Method Summary | |
|---|---|
protected static boolean |
canCreateTables(org.apache.hadoop.conf.Configuration conf)
Deprecated. since 1.5.0; Use canCreateTables(JobContext) instead. |
protected static Boolean |
canCreateTables(org.apache.hadoop.mapreduce.JobContext context)
Determines whether tables are permitted to be created as needed. |
void |
checkOutputSpecs(org.apache.hadoop.mapreduce.JobContext job)
|
protected static BatchWriterConfig |
getBatchWriterOptions(org.apache.hadoop.mapreduce.JobContext context)
Gets the BatchWriterConfig settings. |
protected static String |
getDefaultTableName(org.apache.hadoop.conf.Configuration conf)
Deprecated. since 1.5.0; Use getDefaultTableName(JobContext) instead. |
protected static String |
getDefaultTableName(org.apache.hadoop.mapreduce.JobContext context)
Gets the default table name from the configuration. |
protected static Instance |
getInstance(org.apache.hadoop.conf.Configuration conf)
Deprecated. since 1.5.0; Use getInstance(JobContext) instead. |
protected static Instance |
getInstance(org.apache.hadoop.mapreduce.JobContext context)
Initializes an Accumulo Instance based on the configuration. |
protected static org.apache.log4j.Level |
getLogLevel(org.apache.hadoop.conf.Configuration conf)
Deprecated. since 1.5.0; Use getLogLevel(JobContext) instead. |
protected static org.apache.log4j.Level |
getLogLevel(org.apache.hadoop.mapreduce.JobContext context)
Gets the log level from this configuration. |
protected static int |
getMaxLatency(org.apache.hadoop.conf.Configuration conf)
Deprecated. since 1.5.0; Use getBatchWriterOptions(JobContext) instead. |
protected static long |
getMaxMutationBufferSize(org.apache.hadoop.conf.Configuration conf)
Deprecated. since 1.5.0; Use getBatchWriterOptions(JobContext) instead. |
protected static int |
getMaxWriteThreads(org.apache.hadoop.conf.Configuration conf)
Deprecated. since 1.5.0; Use getBatchWriterOptions(JobContext) instead. |
org.apache.hadoop.mapreduce.OutputCommitter |
getOutputCommitter(org.apache.hadoop.mapreduce.TaskAttemptContext context)
|
protected static String |
getPrincipal(org.apache.hadoop.conf.Configuration conf)
Deprecated. since 1.5.0; Use getToken(JobContext) instead. |
protected static String |
getPrincipal(org.apache.hadoop.mapreduce.JobContext context)
Gets the user name from the configuration. |
org.apache.hadoop.mapreduce.RecordWriter<org.apache.hadoop.io.Text,Mutation> |
getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext attempt)
|
protected static boolean |
getSimulationMode(org.apache.hadoop.conf.Configuration conf)
Deprecated. since 1.5.0; Use getSimulationMode(JobContext) instead. |
protected static Boolean |
getSimulationMode(org.apache.hadoop.mapreduce.JobContext context)
Determines whether this feature is enabled. |
protected static byte[] |
getToken(org.apache.hadoop.conf.Configuration conf)
Deprecated. since 1.5.0; Use getToken(JobContext) instead. |
protected static byte[] |
getToken(org.apache.hadoop.mapreduce.JobContext context)
Gets the password from the configuration. |
protected static String |
getTokenClass(org.apache.hadoop.mapreduce.JobContext context)
Gets the serialized token class name from the configuration. |
protected static Boolean |
isConnectorInfoSet(org.apache.hadoop.mapreduce.JobContext context)
Determines if the connector has been configured. |
static void |
setBatchWriterOptions(org.apache.hadoop.mapreduce.Job job,
BatchWriterConfig bwConfig)
Sets the configuration for for the job's BatchWriter instances. |
static void |
setConnectorInfo(org.apache.hadoop.mapreduce.Job job,
String principal,
AuthenticationToken token)
Sets the connector information needed to communicate with Accumulo in this job. |
static void |
setCreateTables(org.apache.hadoop.mapreduce.Job job,
boolean enableFeature)
Sets the directive to create new tables, as necessary. |
static void |
setDefaultTableName(org.apache.hadoop.mapreduce.Job job,
String tableName)
Sets the default table name to use if one emits a null in place of a table name for a given mutation. |
static void |
setLogLevel(org.apache.hadoop.conf.Configuration conf,
org.apache.log4j.Level level)
Deprecated. since 1.5.0; Use setLogLevel(Job, Level) instead. |
static void |
setLogLevel(org.apache.hadoop.mapreduce.Job job,
org.apache.log4j.Level level)
Sets the log level for this job. |
static void |
setMaxLatency(org.apache.hadoop.conf.Configuration conf,
int numberOfMilliseconds)
Deprecated. since 1.5.0; Use setBatchWriterOptions(Job, BatchWriterConfig) instead. |
static void |
setMaxMutationBufferSize(org.apache.hadoop.conf.Configuration conf,
long numberOfBytes)
Deprecated. since 1.5.0; Use setBatchWriterOptions(Job, BatchWriterConfig) instead. |
static void |
setMaxWriteThreads(org.apache.hadoop.conf.Configuration conf,
int numberOfThreads)
Deprecated. since 1.5.0; Use setBatchWriterOptions(Job, BatchWriterConfig) instead. |
static void |
setMockInstance(org.apache.hadoop.conf.Configuration conf,
String instanceName)
Deprecated. since 1.5.0; Use setMockInstance(Job, String) instead. |
static void |
setMockInstance(org.apache.hadoop.mapreduce.Job job,
String instanceName)
Configures a MockInstance for this job. |
static void |
setOutputInfo(org.apache.hadoop.conf.Configuration conf,
String user,
byte[] passwd,
boolean createTables,
String defaultTable)
Deprecated. since 1.5.0; Use setConnectorInfo(Job, String, AuthenticationToken), setCreateTables(Job, boolean), and
setDefaultTableName(Job, String) instead. |
static void |
setSimulationMode(org.apache.hadoop.conf.Configuration conf)
Deprecated. since 1.5.0; Use setSimulationMode(Job, boolean) instead. |
static void |
setSimulationMode(org.apache.hadoop.mapreduce.Job job,
boolean enableFeature)
Sets the directive to use simulation mode for this job. |
static void |
setZooKeeperInstance(org.apache.hadoop.conf.Configuration conf,
String instanceName,
String zooKeepers)
Deprecated. since 1.5.0; Use setZooKeeperInstance(Job, String, String) instead. |
static void |
setZooKeeperInstance(org.apache.hadoop.mapreduce.Job job,
String instanceName,
String zooKeepers)
Configures a ZooKeeperInstance for this job. |
| Methods inherited from class java.lang.Object |
|---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
| Field Detail |
|---|
protected static final org.apache.log4j.Logger log
| Constructor Detail |
|---|
public AccumuloOutputFormat()
| Method Detail |
|---|
public static void setConnectorInfo(org.apache.hadoop.mapreduce.Job job,
String principal,
AuthenticationToken token)
throws AccumuloSecurityException
WARNING: The serialized token is stored in the configuration and shared with all MapReduce tasks. It is BASE64 encoded to provide a charset safe conversion to a string, and is not intended to be secure.
job - the Hadoop job instance to be configuredprincipal - a valid Accumulo user name (user must have Table.CREATE permission if setCreateTables(Job, boolean) is set to true)token - the user's password
AccumuloSecurityExceptionprotected static Boolean isConnectorInfoSet(org.apache.hadoop.mapreduce.JobContext context)
context - the Hadoop context for the configured job
setConnectorInfo(Job, String, AuthenticationToken)protected static String getPrincipal(org.apache.hadoop.mapreduce.JobContext context)
context - the Hadoop context for the configured job
setConnectorInfo(Job, String, AuthenticationToken)protected static String getTokenClass(org.apache.hadoop.mapreduce.JobContext context)
context - the Hadoop context for the configured job
setConnectorInfo(Job, String, AuthenticationToken)protected static byte[] getToken(org.apache.hadoop.mapreduce.JobContext context)
context - the Hadoop context for the configured job
setConnectorInfo(Job, String, AuthenticationToken)
public static void setZooKeeperInstance(org.apache.hadoop.mapreduce.Job job,
String instanceName,
String zooKeepers)
ZooKeeperInstance for this job.
job - the Hadoop job instance to be configuredinstanceName - the Accumulo instance namezooKeepers - a comma-separated list of zookeeper servers
public static void setMockInstance(org.apache.hadoop.mapreduce.Job job,
String instanceName)
MockInstance for this job.
job - the Hadoop job instance to be configuredinstanceName - the Accumulo instance nameprotected static Instance getInstance(org.apache.hadoop.mapreduce.JobContext context)
Instance based on the configuration.
context - the Hadoop context for the configured job
setZooKeeperInstance(Job, String, String),
setMockInstance(Job, String)
public static void setLogLevel(org.apache.hadoop.mapreduce.Job job,
org.apache.log4j.Level level)
job - the Hadoop job instance to be configuredlevel - the logging levelprotected static org.apache.log4j.Level getLogLevel(org.apache.hadoop.mapreduce.JobContext context)
context - the Hadoop context for the configured job
setLogLevel(Job, Level)
public static void setDefaultTableName(org.apache.hadoop.mapreduce.Job job,
String tableName)
job - the Hadoop job instance to be configuredtableName - the table to use when the tablename is null in the write callprotected static String getDefaultTableName(org.apache.hadoop.mapreduce.JobContext context)
context - the Hadoop context for the configured job
setDefaultTableName(Job, String)
public static void setBatchWriterOptions(org.apache.hadoop.mapreduce.Job job,
BatchWriterConfig bwConfig)
BatchWriter instances. If not set, a new BatchWriterConfig, with sensible built-in defaults is
used. Setting the configuration multiple times overwrites any previous configuration.
job - the Hadoop job instance to be configuredbwConfig - the configuration for the BatchWriterprotected static BatchWriterConfig getBatchWriterOptions(org.apache.hadoop.mapreduce.JobContext context)
BatchWriterConfig settings.
context - the Hadoop context for the configured job
setBatchWriterOptions(Job, BatchWriterConfig)
public static void setCreateTables(org.apache.hadoop.mapreduce.Job job,
boolean enableFeature)
By default, this feature is disabled.
job - the Hadoop job instance to be configuredenableFeature - the feature is enabled if true, disabled otherwiseprotected static Boolean canCreateTables(org.apache.hadoop.mapreduce.JobContext context)
context - the Hadoop context for the configured job
setCreateTables(Job, boolean)
public static void setSimulationMode(org.apache.hadoop.mapreduce.Job job,
boolean enableFeature)
By default, this feature is disabled.
job - the Hadoop job instance to be configuredenableFeature - the feature is enabled if true, disabled otherwiseprotected static Boolean getSimulationMode(org.apache.hadoop.mapreduce.JobContext context)
context - the Hadoop context for the configured job
setSimulationMode(Job, boolean)
public void checkOutputSpecs(org.apache.hadoop.mapreduce.JobContext job)
throws IOException
checkOutputSpecs in class org.apache.hadoop.mapreduce.OutputFormat<org.apache.hadoop.io.Text,Mutation>IOExceptionpublic org.apache.hadoop.mapreduce.OutputCommitter getOutputCommitter(org.apache.hadoop.mapreduce.TaskAttemptContext context)
getOutputCommitter in class org.apache.hadoop.mapreduce.OutputFormat<org.apache.hadoop.io.Text,Mutation>
public org.apache.hadoop.mapreduce.RecordWriter<org.apache.hadoop.io.Text,Mutation> getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext attempt)
throws IOException
getRecordWriter in class org.apache.hadoop.mapreduce.OutputFormat<org.apache.hadoop.io.Text,Mutation>IOException
@Deprecated
public static void setOutputInfo(org.apache.hadoop.conf.Configuration conf,
String user,
byte[] passwd,
boolean createTables,
String defaultTable)
setConnectorInfo(Job, String, AuthenticationToken), setCreateTables(Job, boolean), and
setDefaultTableName(Job, String) instead.
@Deprecated
public static void setZooKeeperInstance(org.apache.hadoop.conf.Configuration conf,
String instanceName,
String zooKeepers)
setZooKeeperInstance(Job, String, String) instead.
@Deprecated
public static void setMockInstance(org.apache.hadoop.conf.Configuration conf,
String instanceName)
setMockInstance(Job, String) instead.
@Deprecated
public static void setMaxMutationBufferSize(org.apache.hadoop.conf.Configuration conf,
long numberOfBytes)
setBatchWriterOptions(Job, BatchWriterConfig) instead.
@Deprecated
public static void setMaxLatency(org.apache.hadoop.conf.Configuration conf,
int numberOfMilliseconds)
setBatchWriterOptions(Job, BatchWriterConfig) instead.
@Deprecated
public static void setMaxWriteThreads(org.apache.hadoop.conf.Configuration conf,
int numberOfThreads)
setBatchWriterOptions(Job, BatchWriterConfig) instead.
@Deprecated
public static void setLogLevel(org.apache.hadoop.conf.Configuration conf,
org.apache.log4j.Level level)
setLogLevel(Job, Level) instead.
@Deprecated public static void setSimulationMode(org.apache.hadoop.conf.Configuration conf)
setSimulationMode(Job, boolean) instead.
@Deprecated protected static String getPrincipal(org.apache.hadoop.conf.Configuration conf)
getToken(JobContext) instead.
@Deprecated protected static byte[] getToken(org.apache.hadoop.conf.Configuration conf)
getToken(JobContext) instead.
@Deprecated protected static boolean canCreateTables(org.apache.hadoop.conf.Configuration conf)
canCreateTables(JobContext) instead.
@Deprecated protected static String getDefaultTableName(org.apache.hadoop.conf.Configuration conf)
getDefaultTableName(JobContext) instead.
@Deprecated protected static Instance getInstance(org.apache.hadoop.conf.Configuration conf)
getInstance(JobContext) instead.
@Deprecated protected static long getMaxMutationBufferSize(org.apache.hadoop.conf.Configuration conf)
getBatchWriterOptions(JobContext) instead.
@Deprecated protected static int getMaxLatency(org.apache.hadoop.conf.Configuration conf)
getBatchWriterOptions(JobContext) instead.
@Deprecated protected static int getMaxWriteThreads(org.apache.hadoop.conf.Configuration conf)
getBatchWriterOptions(JobContext) instead.
@Deprecated protected static org.apache.log4j.Level getLogLevel(org.apache.hadoop.conf.Configuration conf)
getLogLevel(JobContext) instead.
@Deprecated protected static boolean getSimulationMode(org.apache.hadoop.conf.Configuration conf)
getSimulationMode(JobContext) instead.
|
||||||||||
| PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
| SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD | |||||||||