hive

Type Members

trait AlignmentStrategy extends AnyRef

An alignment strategy will accept an input Row and return an output Row that is compatible with the target schema.
An alignment strategy will accept an input Row and return an output Row that is compatible with the target schema. This allows writing to sinks whereby the output schema is not the same as the input schema.
For example, the input may come from a JDBC table, and an output Hive table only defines a subset of the columns. Each row would need to be aligned so that it matches the subset schema.
Implementations are free to add values, drop values or throw an exception if they wish.
trait CommitCallback extends AnyRef
case class Compactor(dbname: String, tablename: String)(implicit fs: FileSystem, conf: Configuration, client: IMetaStoreClient) extends Logging with Product with Serializable
trait FileListener extends AnyRef
trait FilenameStrategy extends AnyRef

Strategy responsible for the filenames created by eel when writing out data.
class HiveContext extends AnyRef
case class HiveDatabase(dbName: String)(implicit fs: FileSystem, client: IMetaStoreClient) extends Product with Serializable
case class HiveDatasetUri(db: String, table: String) extends Product with Serializable
trait HiveDialect extends Logging
class HiveFilePublisher extends Publisher[Seq[Row]] with Using
class HiveOps extends Logging
trait HiveOutputStream extends AnyRef
class HivePartitionExtractor extends AnyRef
class HivePartitionPublisher extends Publisher[Seq[Row]] with Logging

A Hive Part that can read values from the metastore, rather than reading values from files.
A Hive Part that can read values from the metastore, rather than reading values from files. This can be used only when the requested fields are all partition keys.
class HivePartitionScanner extends Logging
case class HiveSink(dbName: String, tableName: String, permission: Option[FsPermission] = None, inheritPermissions: Option[Boolean] = None, principal: Option[String] = None, partitionFields: Seq[String] = Nil, partitionStrategy: PartitionStrategy = new DynamicPartitionStrategy, filenameStrategy: FilenameStrategy = DefaultFilenameStrategy, stagingStrategy: StagingStrategy = DefaultStagingStrategy, metastoreSchemaHandler: MetastoreSchemaHandler = ..., alignStrategy: AlignmentStrategy = RowPaddingAlignmentStrategy, outputSchemaStrategy: OutputSchemaStrategy = SkipPartitionsOutputSchemaStrategy, keytabPath: Option[Path] = None, fileListener: FileListener = FileListener.noop, createTable: Boolean = false, dialect: Option[HiveDialect] = None, callbacks: Seq[CommitCallback] = Nil, roundingMode: RoundingMode = RoundingMode.UNNECESSARY, metadata: Map[String, String] = Map.empty)(implicit fs: FileSystem, client: IMetaStoreClient) extends Sink with Logging with Product with Serializable
class HiveSinkWriter extends SinkWriter with Logging
case class HiveSource(dbName: String, tableName: String, projection: List[String] = Nil, predicate: Option[Predicate] = None, partitionConstraints: Seq[PartitionConstraint] = Nil, principal: Option[String] = None, keytabPath: Option[Path] = None)(implicit fs: FileSystem, client: IMetaStoreClient) extends Source with Logging with Using with Product with Serializable

projection
sets which fields are required by the caller.
predicate
optional predicate which will filter rows at the read level
partitionConstraints
optional constraits on the partition data to narrow which partitions are read
trait HiveStats extends AnyRef
case class HiveTable(dbName: String, tableName: String)(implicit fs: FileSystem, conf: Configuration, client: IMetaStoreClient) extends Logging with Product with Serializable
trait MetastoreSchemaHandler extends AnyRef

A handler that is invoked with the schema of the source and the existing schema in the metastore.
A handler that is invoked with the schema of the source and the existing schema in the metastore.
This allows a handler to decide how to handle differences. For instance an implementation may choose to evolve the metastore schema to add missing fields. Another implemention may throw an exception if the schemas are not aligned.
trait OutputSchemaStrategy extends AnyRef

Accepts a metastore schema and returns the schema that should actually be persisted to disk.
Accepts a metastore schema and returns the schema that should actually be persisted to disk. This allows us to determine if some data is not written, for example in parquet files it is common to skip writing out partition data, since that data is present in the metastore.
class ParquetHiveStats extends HiveStats with Logging
case class PartitionColumn(name: String, dataType: DataType = StringType) extends Product with Serializable
trait RowAligner extends AnyRef
trait StagingStrategy extends AnyRef
trait StagingStrategy2 extends AnyRef
case class TableSpec(tableName: String, tableType: TableType, location: String, cols: Seq[FieldSchema], numBuckets: Int, bucketNames: List[String], params: Map[String, String], inputFormat: String, outputFormat: String, serde: String, retention: Int, createTime: Long, lastAccessTime: Long, owner: String) extends Product with Serializable

Value Members

object DefaultFilenameStrategy extends FilenameStrategy
object DefaultStagingStrategy extends StagingStrategy
object EvolutionMetastoreSchemaHandler extends MetastoreSchemaHandler with Logging

An implementation of MetastoreSchemaHandler that will evolve the metastore schema were possible to match the incoming data.
An implementation of MetastoreSchemaHandler that will evolve the metastore schema were possible to match the incoming data.
It will do this by adding missing fields to the end of the current schema. The new fields cannot be added as partition fields as the table will already have been created.
object FileListener
object HiveDDL
object HiveDatasetUri extends Serializable
object HiveDialect extends Logging
object HiveFileScanner extends Logging
object HiveSchemaFns extends Logging
object HiveSink extends Serializable
object HiveTableFilesFn extends Logging

Locates files for a given table.
Locates files for a given table.
Connects to the hive metastore to get the partitions list (or if no partitions then just root) and scans those directories.
Returns a Map of each partition to the files in that partition.
If partition constraints are specified then those partitions are filtered out.
If there are no partitions then the Map will contain a single key, of Partition.empty which acts as the root.
object NoopMetastoreSchemaHandler extends MetastoreSchemaHandler

An implementation of MetastoreSchemaHandler that does nothing, this may result in errors downstream if, for example, the input schema does not include all columns and defaults cannot be applied.
object RequireCompatibilityMetastoreSchemaHandler extends MetastoreSchemaHandler

An implementation of MetastoreSchemaHandler that requires the input schema to be compatible with the metastore schema.
An implementation of MetastoreSchemaHandler that requires the input schema to be compatible with the metastore schema. Compatiblity is achieved when all fields in the input schema are already defined in the metastore, with compatible types.
With this handler, the input schema is allowed to have extra fields which are not present in the metastore. It is assumed they will be dropped by the alignment strategy.
If the schemas are not compatible then an exception is raised.
object RowPaddingAlignmentStrategy extends AlignmentStrategy

An AlignmentStrategy that will use default values, or nulls, to pad out rows to match the target schema, dropping any fields that exist in the input, but not the output, schema
object SkipPartitionsOutputSchemaStrategy extends OutputSchemaStrategy

This strategy will drop partition columns from the schema so that they not written out to the files.
object StrictMetastoreSchemaHandler extends MetastoreSchemaHandler

An implementation of MetastoreSchemaHandler that requires the input schema to be equal to the metastore schema.
An implementation of MetastoreSchemaHandler that requires the input schema to be equal to the metastore schema. Equality is defined as having the same field names with the same types (order is irrelevant).
Any missing fields or additional fields not present will cause an exception to be raised.
If the schemas are not equal then an exception is raised.
package dialect
package partition

package hive

Type Members

trait AlignmentStrategy extends AnyRef

trait CommitCallback extends AnyRef

case class Compactor(dbname: String, tablename: String)(implicit fs: FileSystem, conf: Configuration, client: IMetaStoreClient) extends Logging with Product with Serializable

trait FileListener extends AnyRef

trait FilenameStrategy extends AnyRef

class HiveContext extends AnyRef

case class HiveDatabase(dbName: String)(implicit fs: FileSystem, client: IMetaStoreClient) extends Product with Serializable

case class HiveDatasetUri(db: String, table: String) extends Product with Serializable

trait HiveDialect extends Logging

class HiveFilePublisher extends Publisher[Seq[Row]] with Using

class HiveOps extends Logging

trait HiveOutputStream extends AnyRef

class HivePartitionExtractor extends AnyRef

class HivePartitionPublisher extends Publisher[Seq[Row]] with Logging

class HivePartitionScanner extends Logging

class HiveSinkWriter extends SinkWriter with Logging

trait HiveStats extends AnyRef

case class HiveTable(dbName: String, tableName: String)(implicit fs: FileSystem, conf: Configuration, client: IMetaStoreClient) extends Logging with Product with Serializable

trait MetastoreSchemaHandler extends AnyRef

trait OutputSchemaStrategy extends AnyRef

class ParquetHiveStats extends HiveStats with Logging

case class PartitionColumn(name: String, dataType: DataType = StringType) extends Product with Serializable

trait RowAligner extends AnyRef

trait StagingStrategy extends AnyRef

trait StagingStrategy2 extends AnyRef

Value Members

object DefaultFilenameStrategy extends FilenameStrategy

object DefaultStagingStrategy extends StagingStrategy

object EvolutionMetastoreSchemaHandler extends MetastoreSchemaHandler with Logging

object FileListener

object HiveDDL

object HiveDatasetUri extends Serializable

object HiveDialect extends Logging

object HiveFileScanner extends Logging

object HiveSchemaFns extends Logging

object HiveSink extends Serializable

object HiveTableFilesFn extends Logging

object NoopMetastoreSchemaHandler extends MetastoreSchemaHandler

object RequireCompatibilityMetastoreSchemaHandler extends MetastoreSchemaHandler

object RowPaddingAlignmentStrategy extends AlignmentStrategy

object SkipPartitionsOutputSchemaStrategy extends OutputSchemaStrategy

object StrictMetastoreSchemaHandler extends MetastoreSchemaHandler

package dialect

package partition

Ungrouped