datasources

Type Members

class CatalogFileIndex extends FileIndex

A FileIndex for a metastore catalog table.
case class CreateTable(tableDesc: CatalogTable, mode: SaveMode, query: Option[LogicalPlan]) extends LogicalPlan with Product with Serializable

Create a table and optionally insert some data into it.
case class CreateTempViewUsing(tableIdent: TableIdentifier, userSpecifiedSchema: Option[StructType], replace: Boolean, global: Boolean, provider: String, options: Map[String, String]) extends LeafNode with RunnableCommand with Product with Serializable

Create or replace a local/global temporary view with given data source.
case class DataSource(sparkSession: SparkSession, className: String, paths: Seq[String] = immutable.this.Nil, userSpecifiedSchema: Option[StructType] = scala.None, partitionColumns: Seq[String] = collection.this.Seq.empty[Nothing], bucketSpec: Option[BucketSpec] = scala.None, options: Map[String, String] = ..., catalogTable: Option[CatalogTable] = scala.None) extends Logging with Product with Serializable

The main class responsible for representing a pluggable Data Source in Spark SQL.
case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport with Product with Serializable

Replaces generic operations with specific variants that are designed to work with Spark SQL Data Sources.
case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with CastSupport with Product with Serializable

A Strategy for planning scans over data sources defined using the sources API.
class FailureSafeParser[IN] extends AnyRef
trait FileFormat extends AnyRef

Used to read and write data stored in files to/from the InternalRow format.
trait FileIndex extends AnyRef

An interface for objects capable of enumerating the root paths of a relation as well as the partitions of a relation subject to some pruning expressions.
case class FilePartition(index: Int, files: Seq[PartitionedFile]) extends Partition with Product with Serializable

A collection of file blocks that should be read as a single task (possibly from multiple partitioned directories).
class FileScanRDD extends RDD[InternalRow]

An RDD that scans a list of file partitions.
abstract class FileStatusCache extends AnyRef

A cache of the leaf files of partition directories.
class FindDataSourceTable extends Rule[LogicalPlan]

Replaces CatalogRelation with data source table if its table provider is not hive.
class HadoopFileLinesReader extends Iterator[Text] with Closeable

An adaptor from a PartitionedFile to an Iterator of Text, which are all of the lines in that file.
case class HadoopFsRelation(location: FileIndex, partitionSchema: StructType, dataSchema: StructType, bucketSpec: Option[BucketSpec], fileFormat: FileFormat, options: Map[String, String])(sparkSession: SparkSession) extends BaseRelation with FileRelation with Product with Serializable

Acts as a container for all of the metadata required to read from a datasource.
class InMemoryFileIndex extends PartitioningAwareFileIndex

A FileIndex that generates the list of files to process by recursively listing all the files present in paths.
case class InsertIntoDataSourceCommand(logicalRelation: LogicalRelation, query: LogicalPlan, overwrite: Boolean) extends LeafNode with RunnableCommand with Product with Serializable

Inserts the results of query in to a relation that extends InsertableRelation.
case class InsertIntoHadoopFsRelationCommand(outputPath: Path, staticPartitions: TablePartitionSpec, ifPartitionNotExists: Boolean, partitionColumns: Seq[String], bucketSpec: Option[BucketSpec], fileFormat: FileFormat, options: Map[String, String], query: LogicalPlan, mode: SaveMode, catalogTable: Option[CatalogTable], fileIndex: Option[FileIndex]) extends LeafNode with RunnableCommand with Product with Serializable

A command for writing data to a HadoopFsRelation.
case class LogicalRelation(relation: BaseRelation, output: Seq[AttributeReference], catalogTable: Option[CatalogTable]) extends LeafNode with MultiInstanceRelation with Product with Serializable

Used to link a BaseRelation in to a logical query plan.
abstract class OutputWriter extends AnyRef

OutputWriter is used together with HadoopFsRelation for persisting rows to the underlying file system.
abstract class OutputWriterFactory extends Serializable

A factory that produces OutputWriters.
case class PartitionDirectory(values: InternalRow, files: Seq[FileStatus]) extends Product with Serializable

A collection of data files from a partitioned relation, along with the partition values in the form of an InternalRow.
case class PartitionPath(values: InternalRow, path: Path) extends Product with Serializable

Holds a directory in a partitioned collection of files as well as the partition values in the form of a Row.
case class PartitionSpec(partitionColumns: StructType, partitions: Seq[PartitionPath]) extends Product with Serializable
case class PartitionedFile(partitionValues: InternalRow, filePath: String, start: Long, length: Long, locations: Array[String] = ...) extends Product with Serializable

A part (i.
abstract class PartitioningAwareFileIndex extends FileIndex with Logging

An abstract class that represents FileIndexs that are aware of partitioned tables.
case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[LogicalPlan] with Product with Serializable

Preprocess CreateTable, to do some normalization and checking.
case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport with Product with Serializable

Preprocess the InsertIntoTable plan.
class RecordReaderIterator[T] extends Iterator[T] with Closeable

An adaptor from a Hadoop RecordReader to an Iterator over the values returned.
case class RefreshResource(path: String) extends LeafNode with RunnableCommand with Product with Serializable
case class RefreshTable(tableIdent: TableIdentifier) extends LeafNode with RunnableCommand with Product with Serializable
class ResolveSQLOnFile extends Rule[LogicalPlan]

Try to replaces UnresolvedRelations if the plan is for direct query on files.
class SQLHadoopMapReduceCommitProtocol extends HadoopMapReduceCommitProtocol with Serializable with Logging

A variant of HadoopMapReduceCommitProtocol that allows specifying the actual Hadoop output committer using an option specified in SQLConf.
case class SaveIntoDataSourceCommand(query: LogicalPlan, provider: String, partitionColumns: Seq[String], options: Map[String, String], mode: SaveMode) extends LeafNode with RunnableCommand with Product with Serializable

Saves the results of query in to a data source.
abstract class TextBasedFileFormat extends FileFormat

The base class file format that is based on text file.

Value Members

object BucketingUtils
object CodecStreams
object DataSource extends Logging with Serializable
object DataSourceStrategy extends Serializable
object FileFormatWriter extends Logging

A helper object for writing FileFormat data out to a location.
object FileSourceStrategy extends Strategy with Logging

A strategy for planning scans over collections of files that might be partitioned or bucketed by user specified columns.
object FileStatusCache

Use FileStatusCache.getOrCreate() to construct a globally shared file status cache.
object HiveOnlyCheck extends (LogicalPlan) ⇒ Unit

A rule to check whether the functions are supported only when Hive support is enabled
object InMemoryFileIndex extends Logging
object LogicalRelation extends Serializable
object NoopCache extends FileStatusCache

A non-caching implementation used when partition file status caching is disabled.
object PartitionPath extends Serializable
object PartitionSpec extends Serializable
object PartitioningAwareFileIndex
object PartitioningUtils
object PreWriteCheck extends (LogicalPlan) ⇒ Unit

A rule to do various checks before inserting into or writing to a data source table.
package csv
package jdbc
package json
package parquet
package text

package datasources

Type Members

class CatalogFileIndex extends FileIndex

case class CreateTable(tableDesc: CatalogTable, mode: SaveMode, query: Option[LogicalPlan]) extends LogicalPlan with Product with Serializable

case class CreateTempViewUsing(tableIdent: TableIdentifier, userSpecifiedSchema: Option[StructType], replace: Boolean, global: Boolean, provider: String, options: Map[String, String]) extends LeafNode with RunnableCommand with Product with Serializable

case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport with Product with Serializable

case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with CastSupport with Product with Serializable

class FailureSafeParser[IN] extends AnyRef

trait FileFormat extends AnyRef

trait FileIndex extends AnyRef

case class FilePartition(index: Int, files: Seq[PartitionedFile]) extends Partition with Product with Serializable

class FileScanRDD extends RDD[InternalRow]

abstract class FileStatusCache extends AnyRef

class FindDataSourceTable extends Rule[LogicalPlan]

class HadoopFileLinesReader extends Iterator[Text] with Closeable

case class HadoopFsRelation(location: FileIndex, partitionSchema: StructType, dataSchema: StructType, bucketSpec: Option[BucketSpec], fileFormat: FileFormat, options: Map[String, String])(sparkSession: SparkSession) extends BaseRelation with FileRelation with Product with Serializable

class InMemoryFileIndex extends PartitioningAwareFileIndex

case class InsertIntoDataSourceCommand(logicalRelation: LogicalRelation, query: LogicalPlan, overwrite: Boolean) extends LeafNode with RunnableCommand with Product with Serializable

case class LogicalRelation(relation: BaseRelation, output: Seq[AttributeReference], catalogTable: Option[CatalogTable]) extends LeafNode with MultiInstanceRelation with Product with Serializable

abstract class OutputWriter extends AnyRef

abstract class OutputWriterFactory extends Serializable

case class PartitionDirectory(values: InternalRow, files: Seq[FileStatus]) extends Product with Serializable

case class PartitionPath(values: InternalRow, path: Path) extends Product with Serializable

case class PartitionSpec(partitionColumns: StructType, partitions: Seq[PartitionPath]) extends Product with Serializable

case class PartitionedFile(partitionValues: InternalRow, filePath: String, start: Long, length: Long, locations: Array[String] = ...) extends Product with Serializable

abstract class PartitioningAwareFileIndex extends FileIndex with Logging

case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[LogicalPlan] with Product with Serializable

case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport with Product with Serializable

class RecordReaderIterator[T] extends Iterator[T] with Closeable

case class RefreshResource(path: String) extends LeafNode with RunnableCommand with Product with Serializable

case class RefreshTable(tableIdent: TableIdentifier) extends LeafNode with RunnableCommand with Product with Serializable

class ResolveSQLOnFile extends Rule[LogicalPlan]

class SQLHadoopMapReduceCommitProtocol extends HadoopMapReduceCommitProtocol with Serializable with Logging

case class SaveIntoDataSourceCommand(query: LogicalPlan, provider: String, partitionColumns: Seq[String], options: Map[String, String], mode: SaveMode) extends LeafNode with RunnableCommand with Product with Serializable

abstract class TextBasedFileFormat extends FileFormat

Value Members

object BucketingUtils

object CodecStreams

object DataSource extends Logging with Serializable

object DataSourceStrategy extends Serializable

object FileFormatWriter extends Logging

object FileSourceStrategy extends Strategy with Logging

object FileStatusCache

object HiveOnlyCheck extends (LogicalPlan) ⇒ Unit

object InMemoryFileIndex extends Logging

object LogicalRelation extends Serializable

object NoopCache extends FileStatusCache

object PartitionPath extends Serializable

object PartitionSpec extends Serializable

object PartitioningAwareFileIndex

object PartitioningUtils

object PreWriteCheck extends (LogicalPlan) ⇒ Unit

package csv

package jdbc

package json

package parquet

package text

Ungrouped