datasources

Type Members

case class AnalyzeCreateTableAsSelect(sparkSession: SparkSession) extends Rule[LogicalPlan] with Product with Serializable

Analyze the query in CREATE TABLE AS SELECT (CTAS).
class CaseInsensitiveMap extends Map[String, String] with Serializable

Builds a map in which keys are case insensitive
case class CreateTableUsing(tableIdent: TableIdentifier, userSpecifiedSchema: Option[StructType], provider: String, temporary: Boolean, options: Map[String, String], partitionColumns: Array[String], bucketSpec: Option[BucketSpec], allowExisting: Boolean, managedIfNoPath: Boolean) extends LeafNode with Command with Product with Serializable

Used to represent the operation of create table using a data source.
case class CreateTableUsingAsSelect(tableIdent: TableIdentifier, provider: String, partitionColumns: Array[String], bucketSpec: Option[BucketSpec], mode: SaveMode, options: Map[String, String], query: LogicalPlan) extends LeafNode with Command with Product with Serializable

A node used to support CTAS statements and saveAsTable for the data source API.
case class CreateTempViewUsing(tableIdent: TableIdentifier, userSpecifiedSchema: Option[StructType], replace: Boolean, provider: String, options: Map[String, String]) extends LeafNode with RunnableCommand with Product with Serializable
case class DataSource(sparkSession: SparkSession, className: String, paths: Seq[String] = immutable.this.Nil, userSpecifiedSchema: Option[StructType] = scala.None, partitionColumns: Seq[String] = collection.this.Seq.empty[Nothing], bucketSpec: Option[BucketSpec] = scala.None, options: Map[String, String] = ...) extends Logging with Product with Serializable

The main class responsible for representing a pluggable Data Source in Spark SQL.
case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] with Product with Serializable

Replaces generic operations with specific variants that are designed to work with Spark SQL Data Sources.
trait FileCatalog extends AnyRef

An interface for objects capable of enumerating the files that comprise a relation as well as the partitioning characteristics of those files.
trait FileFormat extends AnyRef

Used to read and write data stored in files to/from the InternalRow format.
case class FilePartition(index: Int, files: Seq[PartitionedFile]) extends spark.Partition with Product with Serializable

A collection of files that should be read as a single task possibly from multiple partitioned directories.
class FileScanRDD extends RDD[InternalRow]
class FindDataSourceTable extends Rule[LogicalPlan]

Replaces SimpleCatalogRelation with data source table if its table property contains data source information.
class HadoopFileLinesReader extends Iterator[Text] with Closeable

An adaptor from a PartitionedFile to an Iterator of Text, which are all of the lines in that file.
case class HadoopFsRelation(location: FileCatalog, partitionSchema: StructType, dataSchema: StructType, bucketSpec: Option[BucketSpec], fileFormat: FileFormat, options: Map[String, String])(sparkSession: SparkSession) extends BaseRelation with FileRelation with Product with Serializable

Acts as a container for all of the metadata required to read from a datasource.
case class InsertIntoDataSourceCommand(logicalRelation: LogicalRelation, query: LogicalPlan, overwrite: Boolean) extends LeafNode with RunnableCommand with Product with Serializable

Inserts the results of query in to a relation that extends InsertableRelation.
case class InsertIntoHadoopFsRelationCommand(outputPath: Path, partitionColumns: Seq[Attribute], bucketSpec: Option[BucketSpec], fileFormat: FileFormat, refreshFunction: () ⇒ Unit, options: Map[String, String], query: LogicalPlan, mode: SaveMode) extends LeafNode with RunnableCommand with Product with Serializable

A command for writing data to a HadoopFsRelation.
class ListingFileCatalog extends PartitioningAwareFileCatalog

A FileCatalog that generates the list of files to process by recursively listing all the files present in paths.
case class LogicalRelation(relation: BaseRelation, expectedOutputAttributes: Option[Seq[Attribute]] = scala.None, metastoreTableIdentifier: Option[TableIdentifier] = scala.None) extends LeafNode with MultiInstanceRelation with Product with Serializable

Used to link a BaseRelation in to a logical query plan.
abstract class OutputWriter extends AnyRef

::Experimental:: OutputWriter is used together with HadoopFsRelation for persisting rows to the underlying file system.
abstract class OutputWriterFactory extends Serializable

::Experimental:: A factory that produces OutputWriters.
case class Partition(values: InternalRow, files: Seq[FileStatus]) extends Product with Serializable

A collection of data files from a partitioned relation, along with the partition values in the form of an InternalRow.
case class PartitionDirectory(values: InternalRow, path: Path) extends Product with Serializable

Holds a directory in a partitioned collection of files as well as as the partition values in the form of a Row.
case class PartitionSpec(partitionColumns: StructType, partitions: Seq[PartitionDirectory]) extends Product with Serializable
case class PartitionedFile(partitionValues: InternalRow, filePath: String, start: Long, length: Long, locations: Array[String] = ...) extends Product with Serializable

A single file that should be read, along with partition column values that need to be prepended to each row.
abstract class PartitioningAwareFileCatalog extends FileCatalog with Logging

An abstract class that represents FileCatalogs that are aware of partitioned tables.
case class PreWriteCheck(conf: SQLConf, catalog: SessionCatalog) extends (LogicalPlan) ⇒ Unit with Product with Serializable

A rule to do various checks before inserting into or writing to a data source table.
case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] with Product with Serializable

Preprocess the InsertIntoTable plan.
class RecordReaderIterator[T] extends Iterator[T] with Closeable

An adaptor from a Hadoop RecordReader to an Iterator over the values returned.
case class RefreshResource(path: String) extends LeafNode with RunnableCommand with Product with Serializable
case class RefreshTable(tableIdent: TableIdentifier) extends LeafNode with RunnableCommand with Product with Serializable
class ResolveDataSource extends Rule[LogicalPlan]

Try to replaces UnresolvedRelations with ResolveDataSource.
abstract class TextBasedFileFormat extends FileFormat

The base class file format that is based on text file.

Value Members

object BucketingUtils
object DataSourceStrategy extends Strategy with Logging

A Strategy for planning scans over data sources defined using the sources API.
object FileSourceStrategy extends Strategy with Logging

A strategy for planning scans over collections of files that might be partitioned or bucketed by user specified columns.
object HadoopFsRelation extends Logging with Serializable

Helper methods for gathering metadata from HDFS.
object PartitionDirectory extends Serializable
object PartitionSpec extends Serializable
object PartitioningUtils
package csv
package jdbc
package json
package parquet
package text

package datasources

Type Members

case class AnalyzeCreateTableAsSelect(sparkSession: SparkSession) extends Rule[LogicalPlan] with Product with Serializable

class CaseInsensitiveMap extends Map[String, String] with Serializable

case class CreateTableUsingAsSelect(tableIdent: TableIdentifier, provider: String, partitionColumns: Array[String], bucketSpec: Option[BucketSpec], mode: SaveMode, options: Map[String, String], query: LogicalPlan) extends LeafNode with Command with Product with Serializable

case class CreateTempViewUsing(tableIdent: TableIdentifier, userSpecifiedSchema: Option[StructType], replace: Boolean, provider: String, options: Map[String, String]) extends LeafNode with RunnableCommand with Product with Serializable

case class DataSourceAnalysis(conf: CatalystConf) extends Rule[LogicalPlan] with Product with Serializable

trait FileCatalog extends AnyRef

trait FileFormat extends AnyRef

case class FilePartition(index: Int, files: Seq[PartitionedFile]) extends spark.Partition with Product with Serializable

class FileScanRDD extends RDD[InternalRow]

class FindDataSourceTable extends Rule[LogicalPlan]

class HadoopFileLinesReader extends Iterator[Text] with Closeable

case class HadoopFsRelation(location: FileCatalog, partitionSchema: StructType, dataSchema: StructType, bucketSpec: Option[BucketSpec], fileFormat: FileFormat, options: Map[String, String])(sparkSession: SparkSession) extends BaseRelation with FileRelation with Product with Serializable

case class InsertIntoDataSourceCommand(logicalRelation: LogicalRelation, query: LogicalPlan, overwrite: Boolean) extends LeafNode with RunnableCommand with Product with Serializable

class ListingFileCatalog extends PartitioningAwareFileCatalog

case class LogicalRelation(relation: BaseRelation, expectedOutputAttributes: Option[Seq[Attribute]] = scala.None, metastoreTableIdentifier: Option[TableIdentifier] = scala.None) extends LeafNode with MultiInstanceRelation with Product with Serializable

abstract class OutputWriter extends AnyRef

abstract class OutputWriterFactory extends Serializable

case class Partition(values: InternalRow, files: Seq[FileStatus]) extends Product with Serializable

case class PartitionDirectory(values: InternalRow, path: Path) extends Product with Serializable

case class PartitionSpec(partitionColumns: StructType, partitions: Seq[PartitionDirectory]) extends Product with Serializable

case class PartitionedFile(partitionValues: InternalRow, filePath: String, start: Long, length: Long, locations: Array[String] = ...) extends Product with Serializable

abstract class PartitioningAwareFileCatalog extends FileCatalog with Logging

case class PreWriteCheck(conf: SQLConf, catalog: SessionCatalog) extends (LogicalPlan) ⇒ Unit with Product with Serializable

case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] with Product with Serializable

class RecordReaderIterator[T] extends Iterator[T] with Closeable

case class RefreshResource(path: String) extends LeafNode with RunnableCommand with Product with Serializable

case class RefreshTable(tableIdent: TableIdentifier) extends LeafNode with RunnableCommand with Product with Serializable

class ResolveDataSource extends Rule[LogicalPlan]

abstract class TextBasedFileFormat extends FileFormat

Value Members

object BucketingUtils

object DataSourceStrategy extends Strategy with Logging

object FileSourceStrategy extends Strategy with Logging

object HadoopFsRelation extends Logging with Serializable

object PartitionDirectory extends Serializable

object PartitionSpec extends Serializable

object PartitioningUtils

package csv

package jdbc

package json

package parquet

package text

Ungrouped