catalog

Type Members

case class BucketSpec(numBuckets: Int, bucketColumnNames: Seq[String], sortColumnNames: Seq[String]) extends Product with Serializable

A container for bucketing information.
A container for bucketing information. Bucketing is a technology for decomposing data sets into more manageable parts, and the number of buckets is fixed so it does not fluctuate with data.
numBuckets
number of buckets.
bucketColumnNames
the names of the columns that used to generate the bucket id.
sortColumnNames
the names of the columns that used to sort data in each bucket.
case class CatalogDatabase(name: String, description: String, locationUri: URI, properties: Map[String, String]) extends Product with Serializable

A database defined in the catalog.
case class CatalogFunction(identifier: FunctionIdentifier, className: String, resources: Seq[FunctionResource]) extends Product with Serializable

A function defined in the catalog.
A function defined in the catalog.
identifier
name of the function
className
fully qualified class name, e.g. "org.apache.spark.util.MyFunc"
resources
resource types and Uris used by the function
case class CatalogStatistics(sizeInBytes: BigInt, rowCount: Option[BigInt] = None, colStats: Map[String, ColumnStat] = Map.empty) extends Product with Serializable

This class of statistics is used in CatalogTable to interact with metastore.
This class of statistics is used in CatalogTable to interact with metastore. We define this new class instead of directly using Statistics here because there are no concepts of attributes or broadcast hint in catalog.
case class CatalogStorageFormat(locationUri: Option[URI], inputFormat: Option[String], outputFormat: Option[String], serde: Option[String], compressed: Boolean, properties: Map[String, String]) extends Product with Serializable

Storage format, used to describe how a partition or a table is stored.
case class CatalogTable(identifier: TableIdentifier, tableType: CatalogTableType, storage: CatalogStorageFormat, schema: StructType, provider: Option[String] = None, partitionColumnNames: Seq[String] = Seq.empty, bucketSpec: Option[BucketSpec] = None, owner: String = "", createTime: Long = System.currentTimeMillis, lastAccessTime: Long = 1, properties: Map[String, String] = Map.empty, stats: Option[CatalogStatistics] = None, viewText: Option[String] = None, comment: Option[String] = None, unsupportedFeatures: Seq[String] = Seq.empty, tracksPartitionsInCatalog: Boolean = false, schemaPreservesCase: Boolean = true) extends Product with Serializable

A table defined in the catalog.
A table defined in the catalog.
Note that Hive's metastore also tracks skewed columns. We should consider adding that in the future once we have a better understanding of how we want to handle skewed columns.
provider
the name of the data source provider for this table, e.g. parquet, json, etc. Can be None if this table is a View, should be "hive" for hive serde tables.
unsupportedFeatures
is a list of string descriptions of features that are used by the underlying table but not supported by Spark SQL yet.
tracksPartitionsInCatalog
whether this table's partition metadata is stored in the catalog. If false, it is inferred automatically based on file structure.
schemaPreservesCase
Whether or not the schema resolved for this table is case-sensitive. When using a Hive Metastore, this flag is set to false if a case- sensitive schema was unable to be read from the table properties. Used to trigger case-sensitive schema inference at query time, when configured.
case class CatalogTablePartition(spec: TablePartitionSpec, storage: CatalogStorageFormat, parameters: Map[String, String] = Map.empty) extends Product with Serializable

A partition (Hive style) defined in the catalog.
A partition (Hive style) defined in the catalog.
spec
partition spec values indexed by column name
storage
storage format of the partition
parameters
some parameters for the partition, for example, stats.
case class CatalogTableType extends Product with Serializable
case class CreateDatabaseEvent(database: String) extends DatabaseEvent with Product with Serializable

Event fired after a database has been created.
case class CreateDatabasePreEvent(database: String) extends DatabaseEvent with Product with Serializable

Event fired before a database is created.
case class CreateFunctionEvent(database: String, name: String) extends FunctionEvent with Product with Serializable

Event fired after a function has been created.
case class CreateFunctionPreEvent(database: String, name: String) extends FunctionEvent with Product with Serializable

Event fired before a function is created.
case class CreateTableEvent(database: String, name: String) extends TableEvent with Product with Serializable

Event fired after a table has been created.
case class CreateTablePreEvent(database: String, name: String) extends TableEvent with Product with Serializable

Event fired before a table is created.
trait DatabaseEvent extends ExternalCatalogEvent

Event fired when a database is create or dropped.
case class DropDatabaseEvent(database: String) extends DatabaseEvent with Product with Serializable

Event fired after a database has been dropped.
case class DropDatabasePreEvent(database: String) extends DatabaseEvent with Product with Serializable

Event fired before a database is dropped.
case class DropFunctionEvent(database: String, name: String) extends FunctionEvent with Product with Serializable

Event fired after a function has been dropped.
case class DropFunctionPreEvent(database: String, name: String) extends FunctionEvent with Product with Serializable

Event fired before a function is dropped.
case class DropTableEvent(database: String, name: String) extends TableEvent with Product with Serializable

Event fired after a table has been dropped.
case class DropTablePreEvent(database: String, name: String) extends TableEvent with Product with Serializable

Event fired before a table is dropped.
abstract class ExternalCatalog extends ListenerBus[ExternalCatalogEventListener, ExternalCatalogEvent]

Interface for the system catalog (of functions, partitions, tables, and databases).
Interface for the system catalog (of functions, partitions, tables, and databases).
This is only used for non-temporary items, and implementations must be thread-safe as they can be accessed in multiple threads. This is an external catalog because it is expected to interact with external systems.
Implementations should throw NoSuchDatabaseException when databases don't exist.
trait ExternalCatalogEvent extends SparkListenerEvent

Event emitted by the external catalog when it is modified.
Event emitted by the external catalog when it is modified. Events are either fired before or after the modification (the event should document this).
trait ExternalCatalogEventListener extends AnyRef

Listener interface for external catalog modification events.
trait FunctionEvent extends DatabaseEvent

Event fired when a function is created, dropped or renamed.
case class FunctionResource(resourceType: FunctionResourceType, uri: String) extends Product with Serializable
trait FunctionResourceLoader extends AnyRef

A simple trait representing a class that can be used to load resources used by a function.
A simple trait representing a class that can be used to load resources used by a function. Because only a SQLContext can load resources, we create this trait to avoid of explicitly passing SQLContext around.
abstract class FunctionResourceType extends AnyRef

A trait that represents the type of a resourced needed by a function.
class GlobalTempViewManager extends AnyRef

A thread-safe manager for global temporary views, providing atomic operations to manage them, e.g.
A thread-safe manager for global temporary views, providing atomic operations to manage them, e.g. create, update, remove, etc.
Note that, the view name is always case-sensitive here, callers are responsible to format the view name w.r.t. case-sensitive config.
case class HiveTableRelation(tableMeta: CatalogTable, dataCols: Seq[AttributeReference], partitionCols: Seq[AttributeReference]) extends LeafNode with MultiInstanceRelation with Product with Serializable

A LogicalPlan that represents a hive table.
A LogicalPlan that represents a hive table.
TODO: remove this after we completely make hive as a data source.
class InMemoryCatalog extends ExternalCatalog

An in-memory (ephemeral) implementation of the system catalog.
An in-memory (ephemeral) implementation of the system catalog.
This is a dummy implementation that does not require setting up external systems. It is intended for testing or exploration purposes only and should not be used in production.
All public methods should be synchronized for thread-safety.
case class RenameFunctionEvent(database: String, name: String, newName: String) extends FunctionEvent with Product with Serializable

Event fired after a function has been renamed.
case class RenameFunctionPreEvent(database: String, name: String, newName: String) extends FunctionEvent with Product with Serializable

Event fired before a function is renamed.
case class RenameTableEvent(database: String, name: String, newName: String) extends TableEvent with Product with Serializable

Event fired after a table has been renamed.
case class RenameTablePreEvent(database: String, name: String, newName: String) extends TableEvent with Product with Serializable

Event fired before a table is renamed.
class SessionCatalog extends Logging

An internal catalog that is used by a Spark Session.
An internal catalog that is used by a Spark Session. This internal catalog serves as a proxy to the underlying metastore (e.g. Hive Metastore) and it also manages temporary tables and functions of the Spark Session that it belongs to.
This class must be thread-safe.
trait TableEvent extends DatabaseEvent

Event fired when a table is created, dropped or renamed.
case class UnresolvedCatalogRelation(tableMeta: CatalogTable) extends LeafNode with Product with Serializable

A placeholder for a table relation, which will be replaced by concrete relation like LogicalRelation or HiveTableRelation, during analysis.

Value Members

object ArchiveResource extends FunctionResourceType
object CatalogStorageFormat extends Serializable
object CatalogTable extends Serializable
object CatalogTableType extends Serializable
object CatalogTypes
object CatalogUtils
object DummyFunctionResourceLoader extends FunctionResourceLoader
object ExternalCatalogUtils
object FileResource extends FunctionResourceType
object FunctionResourceType
object JarResource extends FunctionResourceType
object SessionCatalog

package catalog

Type Members

case class BucketSpec(numBuckets: Int, bucketColumnNames: Seq[String], sortColumnNames: Seq[String]) extends Product with Serializable

case class CatalogDatabase(name: String, description: String, locationUri: URI, properties: Map[String, String]) extends Product with Serializable

case class CatalogFunction(identifier: FunctionIdentifier, className: String, resources: Seq[FunctionResource]) extends Product with Serializable

case class CatalogStatistics(sizeInBytes: BigInt, rowCount: Option[BigInt] = None, colStats: Map[String, ColumnStat] = Map.empty) extends Product with Serializable

case class CatalogStorageFormat(locationUri: Option[URI], inputFormat: Option[String], outputFormat: Option[String], serde: Option[String], compressed: Boolean, properties: Map[String, String]) extends Product with Serializable

case class CatalogTablePartition(spec: TablePartitionSpec, storage: CatalogStorageFormat, parameters: Map[String, String] = Map.empty) extends Product with Serializable

case class CatalogTableType extends Product with Serializable

case class CreateDatabaseEvent(database: String) extends DatabaseEvent with Product with Serializable

case class CreateDatabasePreEvent(database: String) extends DatabaseEvent with Product with Serializable

case class CreateFunctionEvent(database: String, name: String) extends FunctionEvent with Product with Serializable

case class CreateFunctionPreEvent(database: String, name: String) extends FunctionEvent with Product with Serializable

case class CreateTableEvent(database: String, name: String) extends TableEvent with Product with Serializable

case class CreateTablePreEvent(database: String, name: String) extends TableEvent with Product with Serializable

trait DatabaseEvent extends ExternalCatalogEvent

case class DropDatabaseEvent(database: String) extends DatabaseEvent with Product with Serializable

case class DropDatabasePreEvent(database: String) extends DatabaseEvent with Product with Serializable

case class DropFunctionEvent(database: String, name: String) extends FunctionEvent with Product with Serializable

case class DropFunctionPreEvent(database: String, name: String) extends FunctionEvent with Product with Serializable

case class DropTableEvent(database: String, name: String) extends TableEvent with Product with Serializable

case class DropTablePreEvent(database: String, name: String) extends TableEvent with Product with Serializable

abstract class ExternalCatalog extends ListenerBus[ExternalCatalogEventListener, ExternalCatalogEvent]

trait ExternalCatalogEvent extends SparkListenerEvent

trait ExternalCatalogEventListener extends AnyRef

trait FunctionEvent extends DatabaseEvent

case class FunctionResource(resourceType: FunctionResourceType, uri: String) extends Product with Serializable

trait FunctionResourceLoader extends AnyRef

abstract class FunctionResourceType extends AnyRef

class GlobalTempViewManager extends AnyRef

case class HiveTableRelation(tableMeta: CatalogTable, dataCols: Seq[AttributeReference], partitionCols: Seq[AttributeReference]) extends LeafNode with MultiInstanceRelation with Product with Serializable

class InMemoryCatalog extends ExternalCatalog

case class RenameFunctionEvent(database: String, name: String, newName: String) extends FunctionEvent with Product with Serializable

case class RenameFunctionPreEvent(database: String, name: String, newName: String) extends FunctionEvent with Product with Serializable

case class RenameTableEvent(database: String, name: String, newName: String) extends TableEvent with Product with Serializable

case class RenameTablePreEvent(database: String, name: String, newName: String) extends TableEvent with Product with Serializable

class SessionCatalog extends Logging

trait TableEvent extends DatabaseEvent

case class UnresolvedCatalogRelation(tableMeta: CatalogTable) extends LeafNode with Product with Serializable

Value Members

object ArchiveResource extends FunctionResourceType

object CatalogStorageFormat extends Serializable

object CatalogTable extends Serializable

object CatalogTableType extends Serializable

object CatalogTypes

object CatalogUtils

object DummyFunctionResourceLoader extends FunctionResourceLoader

object ExternalCatalogUtils

object FileResource extends FunctionResourceType

object FunctionResourceType

object JarResource extends FunctionResourceType

object SessionCatalog

Ungrouped