public class InputConfigurator extends ConfiguratorBase
Modifier and Type | Class and Description |
---|---|
static class |
InputConfigurator.Features
Configuration keys for various features.
|
static class |
InputConfigurator.ScanOpts
Configuration keys for
Scanner . |
ConfiguratorBase.ConnectorInfo, ConfiguratorBase.GeneralOpts, ConfiguratorBase.InstanceOpts, ConfiguratorBase.TokenSource
Constructor and Description |
---|
InputConfigurator() |
Modifier and Type | Method and Description |
---|---|
static void |
addIterator(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf,
IteratorSetting cfg)
Encode an iterator on the input for the single input table associated with this job.
|
static Map<String,Map<KeyExtent,List<Range>>> |
binOffline(String tableId,
List<Range> ranges,
Instance instance,
Connector conn) |
static Set<Pair<org.apache.hadoop.io.Text,org.apache.hadoop.io.Text>> |
deserializeFetchedColumns(Collection<String> serialized) |
static void |
fetchColumns(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf,
Collection<Pair<org.apache.hadoop.io.Text,org.apache.hadoop.io.Text>> columnFamilyColumnQualifierPairs)
Restricts the columns that will be mapped over for the single input table on this job.
|
static Boolean |
getAutoAdjustRanges(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf)
Determines whether a configuration has auto-adjust ranges enabled.
|
static String |
getClassLoaderContext(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf)
Gets the name of the context classloader to use for scans
|
protected static Map.Entry<String,InputTableConfig> |
getDefaultInputTableConfig(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf)
Returns the
InputTableConfig for the configuration based on the properties set using the single-table
input methods. |
static Set<Pair<org.apache.hadoop.io.Text,org.apache.hadoop.io.Text>> |
getFetchedColumns(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf)
Gets the columns to be mapped over from this job.
|
static InputTableConfig |
getInputTableConfig(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf,
String tableName)
Returns the
InputTableConfig for the given table |
static Map<String,InputTableConfig> |
getInputTableConfigs(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf)
Returns all
InputTableConfig objects associated with this job. |
static String |
getInputTableName(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf)
Sets the name of the input table, over which this job will scan.
|
static List<IteratorSetting> |
getIterators(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf)
Gets a list of the iterator settings (for iterators to apply to a scanner) from this configuration.
|
static List<Range> |
getRanges(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf)
Gets the ranges to scan over from a job.
|
static SamplerConfiguration |
getSamplerConfiguration(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf) |
static Authorizations |
getScanAuthorizations(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf)
Gets the authorizations to set for the scans from the configuration.
|
static TabletLocator |
getTabletLocator(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf,
String tableId)
Initializes an Accumulo
TabletLocator based on the configuration. |
static Boolean |
isBatchScan(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf)
Determines whether a configuration has the BatchScanner feature enabled.
|
static Boolean |
isIsolated(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf)
Determines whether a configuration has isolation enabled.
|
static Boolean |
isOfflineScan(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf)
Determines whether a configuration has the offline table scan feature enabled.
|
static String[] |
serializeColumns(Collection<Pair<org.apache.hadoop.io.Text,org.apache.hadoop.io.Text>> columnFamilyColumnQualifierPairs) |
static void |
setAutoAdjustRanges(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf,
boolean enableFeature)
Controls the automatic adjustment of ranges for this job.
|
static void |
setBatchScan(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf,
boolean enableFeature)
Controls the use of the
BatchScanner in this job. |
static void |
setClassLoaderContext(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf,
String context)
Sets the name of the context classloader to use for scans
|
static void |
setInputTableConfigs(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf,
Map<String,InputTableConfig> configs)
Sets configurations for multiple tables at a time.
|
static void |
setInputTableName(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf,
String tableName)
Sets the name of the input table, over which this job will scan.
|
static void |
setLocalIterators(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf,
boolean enableFeature)
Controls the use of the
ClientSideIteratorScanner in this job. |
static void |
setOfflineTableScan(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf,
boolean enableFeature)
Enable reading offline tables.
|
static void |
setRanges(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf,
Collection<Range> ranges)
Sets the input ranges to scan on all input tables for this job.
|
static void |
setSamplerConfiguration(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf,
SamplerConfiguration samplerConfig) |
static void |
setScanAuthorizations(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf,
Authorizations auths)
Sets the
Authorizations used to scan. |
static void |
setScanIsolation(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf,
boolean enableFeature)
Controls the use of the
IsolatedScanner in this job. |
static Boolean |
usesLocalIterators(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf)
Determines whether a configuration uses local iterators.
|
static Instance |
validateInstance(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf)
Validates and extracts an
Instance from the configuration |
static void |
validateOptions(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf)
Deprecated.
|
static void |
validatePermissions(Class<?> implementingClass,
org.apache.hadoop.conf.Configuration conf,
Connector conn)
Validates that the user has permissions on the requested tables
|
enumToConfKey, enumToConfKey, getAuthenticationToken, getClientConfiguration, getInstance, getLogLevel, getPrincipal, getTokenFromFile, getVisibilityCacheSize, isConnectorInfoSet, setConnectorInfo, setConnectorInfo, setLogLevel, setMockInstance, setVisibilityCacheSize, setZooKeeperInstance, unwrapAuthenticationToken, unwrapAuthenticationToken
public static void setClassLoaderContext(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf, String context)
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configurecontext
- the name of the context classloaderpublic static String getClassLoaderContext(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf)
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configurepublic static void setInputTableName(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf, String tableName)
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configuretableName
- the table to use when the tablename is null in the write callpublic static String getInputTableName(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf)
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configurepublic static void setScanAuthorizations(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf, Authorizations auths)
Authorizations
used to scan. Must be a subset of the user's authorization. Defaults to the empty set.implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configureauths
- the user's authorizationspublic static Authorizations getScanAuthorizations(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf)
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configuresetScanAuthorizations(Class, Configuration, Authorizations)
public static void setRanges(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf, Collection<Range> ranges)
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configureranges
- the ranges that will be mapped overIllegalArgumentException
- if the ranges cannot be encoded into base 64public static List<Range> getRanges(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf) throws IOException
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configureIOException
- if the ranges have been encoded improperlysetRanges(Class, Configuration, Collection)
public static List<IteratorSetting> getIterators(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf)
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configureaddIterator(Class, Configuration, IteratorSetting)
public static void fetchColumns(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf, Collection<Pair<org.apache.hadoop.io.Text,org.apache.hadoop.io.Text>> columnFamilyColumnQualifierPairs)
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configurecolumnFamilyColumnQualifierPairs
- a pair of Text
objects corresponding to column family and column qualifier. If the column qualifier is null, the entire column family is
selected. An empty set is the default and is equivalent to scanning the all columns.IllegalArgumentException
- if the column family is nullpublic static String[] serializeColumns(Collection<Pair<org.apache.hadoop.io.Text,org.apache.hadoop.io.Text>> columnFamilyColumnQualifierPairs)
public static Set<Pair<org.apache.hadoop.io.Text,org.apache.hadoop.io.Text>> getFetchedColumns(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf)
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configurefetchColumns(Class, Configuration, Collection)
public static Set<Pair<org.apache.hadoop.io.Text,org.apache.hadoop.io.Text>> deserializeFetchedColumns(Collection<String> serialized)
public static void addIterator(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf, IteratorSetting cfg)
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configurecfg
- the configuration of the iteratorIllegalArgumentException
- if the iterator can't be serialized into the configurationpublic static void setAutoAdjustRanges(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf, boolean enableFeature)
By default, this feature is enabled.
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configureenableFeature
- the feature is enabled if true, disabled otherwisesetRanges(Class, Configuration, Collection)
public static Boolean getAutoAdjustRanges(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf)
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configuresetAutoAdjustRanges(Class, Configuration, boolean)
public static void setScanIsolation(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf, boolean enableFeature)
IsolatedScanner
in this job.
By default, this feature is disabled.
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configureenableFeature
- the feature is enabled if true, disabled otherwisepublic static Boolean isIsolated(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf)
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configuresetScanIsolation(Class, Configuration, boolean)
public static void setLocalIterators(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf, boolean enableFeature)
ClientSideIteratorScanner
in this job. Enabling this feature will cause the iterator stack to be constructed within the Map
task, rather than within the Accumulo TServer. To use this feature, all classes needed for those iterators must be available on the classpath for the task.
By default, this feature is disabled.
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configureenableFeature
- the feature is enabled if true, disabled otherwisepublic static Boolean usesLocalIterators(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf)
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configuresetLocalIterators(Class, Configuration, boolean)
public static void setOfflineTableScan(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf, boolean enableFeature)
To use this option, the map reduce user will need access to read the Accumulo directory in HDFS.
Reading the offline table will create the scan time iterator stack in the map process. So any iterators that are configured for the table will need to be on the mapper's classpath.
One way to use this feature is to clone a table, take the clone offline, and use the clone as the input table for a map reduce job. If you plan to map reduce over the data many times, it may be better to the compact the table, clone it, take it offline, and use the clone for all map reduce jobs. The reason to do this is that compaction will reduce each tablet in the table to one file, and it is faster to read from one file.
There are two possible advantages to reading a tables file directly out of HDFS. First, you may see better read performance. Second, it will support speculative execution better. When reading an online table speculative execution can put more load on an already slow tablet server.
By default, this feature is disabled.
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configureenableFeature
- the feature is enabled if true, disabled otherwisepublic static Boolean isOfflineScan(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf)
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configuresetOfflineTableScan(Class, Configuration, boolean)
public static void setBatchScan(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf, boolean enableFeature)
BatchScanner
in this job. Using this feature will group ranges by their source tablet per InputSplit and use BatchScanner
to read them.
By default, this feature is disabled.
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configureenableFeature
- the feature is enabled if true, disabled otherwisepublic static Boolean isBatchScan(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf)
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configuresetBatchScan(Class, Configuration, boolean)
public static void setInputTableConfigs(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf, Map<String,InputTableConfig> configs)
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configureconfigs
- an array of InputTableConfig
objects to associate with the jobpublic static Map<String,InputTableConfig> getInputTableConfigs(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf)
InputTableConfig
objects associated with this job.implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configurepublic static InputTableConfig getInputTableConfig(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf, String tableName)
InputTableConfig
for the given tableimplementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configuretableName
- the table name for which to fetch the table query configpublic static TabletLocator getTabletLocator(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf, String tableId) throws TableNotFoundException
TabletLocator
based on the configuration.implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configuretableId
- The table id for which to initialize the TabletLocator
TableNotFoundException
- if the table name set on the configuration doesn't existpublic static Instance validateInstance(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf) throws IOException
Instance
from the configurationimplementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configureIOException
public static void validatePermissions(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf, Connector conn) throws IOException
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configureconn
- the ConnectorIOException
@Deprecated public static void validateOptions(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf) throws IOException
InputFormat
.
The implementation (JobContext or JobConf which created the Configuration) needs to be used to extract the proper AuthenticationToken
for
DelegationTokenImpl
support.
implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop configuration object to configureIOException
- if the context is improperly configuredvalidateInstance(Class, Configuration)
,
validatePermissions(Class, Configuration, Connector)
protected static Map.Entry<String,InputTableConfig> getDefaultInputTableConfig(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf)
InputTableConfig
for the configuration based on the properties set using the single-table
input methods.implementingClass
- the class whose name will be used as a prefix for the property configuration keyconf
- the Hadoop instance for which to retrieve the configurationpublic static Map<String,Map<KeyExtent,List<Range>>> binOffline(String tableId, List<Range> ranges, Instance instance, Connector conn) throws AccumuloException, TableNotFoundException
public static void setSamplerConfiguration(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf, SamplerConfiguration samplerConfig)
public static SamplerConfiguration getSamplerConfiguration(Class<?> implementingClass, org.apache.hadoop.conf.Configuration conf)
Copyright © 2011–2016 The Apache Software Foundation. All rights reserved.