DataFrameWriter

final class DataFrameWriter[T] extends sql.DataFrameWriter[T]

Interface used to write a Dataset to external storage systems (e.g. file systems, key-value stores, etc). Use Dataset.write to access this.

Annotations: @Stable()
Since: 1.4.0

Linear Supertypes

sql.DataFrameWriter[T], AnyRef, Any

Ordering

Alphabetic
By Inheritance

Inherited

DataFrameWriter
DataFrameWriter
AnyRef
Any

Hide All
Show All

Visibility

Public
Protected

Value Members

final def !=(arg0: Any): Boolean
Definition Classes
AnyRef → Any
final def ##: Int
Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean
Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0
Definition Classes
Any
def assertNotBucketed(operation: String): Unit
Attributes
protected
Definition Classes
DataFrameWriter
def assertNotClustered(operation: String): Unit
Attributes
protected
Definition Classes
DataFrameWriter
def assertNotPartitioned(operation: String): Unit
Attributes
protected
Definition Classes
DataFrameWriter
def bucketBy(numBuckets: Int, colName: String, colNames: String*): DataFrameWriter.this.type
<invalid inheritdoc annotation>
<invalid inheritdoc annotation>
Definition Classes
DataFrameWriter → DataFrameWriter
Annotations
@varargs()
var bucketColumnNames: Option[Seq[String]]
Attributes
protected
Definition Classes
DataFrameWriter
def clone(): AnyRef
Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws(classOf[java.lang.CloneNotSupportedException]) @IntrinsicCandidate() @native()
def clusterBy(colName: String, colNames: String*): DataFrameWriter.this.type
<invalid inheritdoc annotation>
<invalid inheritdoc annotation>
Definition Classes
DataFrameWriter → DataFrameWriter
Annotations
@varargs()
var clusteringColumns: Option[Seq[String]]
Attributes
protected
Definition Classes
DataFrameWriter
def csv(path: String): Unit
Definition Classes
DataFrameWriter
var curmode: SaveMode
Attributes
protected
Definition Classes
DataFrameWriter
final def eq(arg0: AnyRef): Boolean
Definition Classes
AnyRef
def equals(arg0: AnyRef): Boolean
Definition Classes
AnyRef → Any
var extraOptions: CaseInsensitiveMap[String]
Attributes
protected
Definition Classes
DataFrameWriter
def format(source: String): DataFrameWriter.this.type
<invalid inheritdoc annotation>
<invalid inheritdoc annotation>
Definition Classes
DataFrameWriter → DataFrameWriter
final def getClass(): Class[_ <: AnyRef]
Definition Classes
AnyRef → Any
Annotations
@IntrinsicCandidate() @native()
def hashCode(): Int
Definition Classes
AnyRef → Any
Annotations
@IntrinsicCandidate() @native()
def insertInto(tableName: String): Unit
Inserts the content of the DataFrame to the specified table.
Inserts the content of the DataFrame to the specified table. It requires that the schema of the DataFrame is the same as the schema of the table.
Definition Classes
DataFrameWriter → DataFrameWriter
Since
1.4.0
Note
Unlike saveAsTable, insertInto ignores the column names and just uses position-based resolution. For example:
,
SaveMode.ErrorIfExists and SaveMode.Ignore behave as SaveMode.Append in insertInto as insertInto is not a table creating operation.
scala> Seq((1, 2)).toDF("i", "j").write.mode("overwrite").saveAsTable("t1") scala> Seq((3, 4)).toDF("j", "i").write.insertInto("t1") scala> Seq((5, 6)).toDF("a", "b").write.insertInto("t1") scala> sql("select * from t1").show +---+---+ | i| j| +---+---+ | 5| 6| | 3| 4| | 1| 2| +---+---+
Because it inserts data to an existing table, format or options will be ignored.
def isBucketed(): Boolean
Attributes
protected
Definition Classes
DataFrameWriter
final def isInstanceOf[T0]: Boolean
Definition Classes
Any
def jdbc(url: String, table: String, connectionProperties: Properties): Unit
Definition Classes
DataFrameWriter
def json(path: String): Unit
Definition Classes
DataFrameWriter
def mode(saveMode: String): DataFrameWriter.this.type
<invalid inheritdoc annotation>
<invalid inheritdoc annotation>
Definition Classes
DataFrameWriter → DataFrameWriter
def mode(saveMode: SaveMode): DataFrameWriter.this.type
<invalid inheritdoc annotation>
<invalid inheritdoc annotation>
Definition Classes
DataFrameWriter → DataFrameWriter
final def ne(arg0: AnyRef): Boolean
Definition Classes
AnyRef
final def notify(): Unit
Definition Classes
AnyRef
Annotations
@IntrinsicCandidate() @native()
final def notifyAll(): Unit
Definition Classes
AnyRef
Annotations
@IntrinsicCandidate() @native()
var numBuckets: Option[Int]
Attributes
protected
Definition Classes
DataFrameWriter
def option(key: String, value: Double): DataFrameWriter.this.type
<invalid inheritdoc annotation>
<invalid inheritdoc annotation>
Definition Classes
DataFrameWriter → DataFrameWriter
def option(key: String, value: Long): DataFrameWriter.this.type
<invalid inheritdoc annotation>
<invalid inheritdoc annotation>
Definition Classes
DataFrameWriter → DataFrameWriter
def option(key: String, value: Boolean): DataFrameWriter.this.type
<invalid inheritdoc annotation>
<invalid inheritdoc annotation>
Definition Classes
DataFrameWriter → DataFrameWriter
def option(key: String, value: String): DataFrameWriter.this.type
<invalid inheritdoc annotation>
<invalid inheritdoc annotation>
Definition Classes
DataFrameWriter → DataFrameWriter
def options(options: Map[String, String]): DataFrameWriter.this.type
<invalid inheritdoc annotation>
<invalid inheritdoc annotation>
Definition Classes
DataFrameWriter → DataFrameWriter
def options(options: Map[String, String]): DataFrameWriter.this.type
<invalid inheritdoc annotation>
<invalid inheritdoc annotation>
Definition Classes
DataFrameWriter → DataFrameWriter
def orc(path: String): Unit
Definition Classes
DataFrameWriter
def parquet(path: String): Unit
Definition Classes
DataFrameWriter
def partitionBy(colNames: String*): DataFrameWriter.this.type
<invalid inheritdoc annotation>
<invalid inheritdoc annotation>
Definition Classes
DataFrameWriter → DataFrameWriter
Annotations
@varargs()
var partitioningColumns: Option[Seq[String]]
Attributes
protected
Definition Classes
DataFrameWriter
def save(): Unit
Saves the content of the DataFrame as the specified table.
Saves the content of the DataFrame as the specified table.
Definition Classes
DataFrameWriter → DataFrameWriter
Since
1.4.0
def save(path: String): Unit
Saves the content of the DataFrame at the specified path.
Saves the content of the DataFrame at the specified path.
Definition Classes
DataFrameWriter → DataFrameWriter
Since
1.4.0
def saveAsTable(tableName: String): Unit
Saves the content of the DataFrame as the specified table.
Saves the content of the DataFrame as the specified table.
In the case the table already exists, behavior of this function depends on the save mode, specified by the mode function (default to throwing an exception). When mode is Overwrite, the schema of the DataFrame does not need to be the same as that of the existing table.
When mode is Append, if there is an existing table, we will use the format and options of the existing table. The column order in the schema of the DataFrame doesn't need to be same as that of the existing table. Unlike insertInto, saveAsTable will use the column names to find the correct column positions. For example:
```
scala> Seq((1, 2)).toDF("i", "j").write.mode("overwrite").saveAsTable("t1")
scala> Seq((3, 4)).toDF("j", "i").write.mode("append").saveAsTable("t1")
scala> sql("select * from t1").show
+---+---+
|  i|  j|
+---+---+
|  1|  2|
|  4|  3|
+---+---+
```
In this method, save mode is used to determine the behavior if the data source table exists in Spark catalog. We will always overwrite the underlying data of data source (e.g. a table in JDBC data source) if the table doesn't exist in Spark catalog, and will always append to the underlying data of data source if the table already exists.
When the DataFrame is created from a non-partitioned HadoopFsRelation with a single input path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC and Parquet), the table is persisted in a Hive compatible format, which means other systems like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL specific format.
Definition Classes
DataFrameWriter → DataFrameWriter
Since
1.4.0
def sortBy(colName: String, colNames: String*): DataFrameWriter.this.type
<invalid inheritdoc annotation>
<invalid inheritdoc annotation>
Definition Classes
DataFrameWriter → DataFrameWriter
Annotations
@varargs()
var sortColumnNames: Option[Seq[String]]
Attributes
protected
Definition Classes
DataFrameWriter
var source: String
Attributes
protected
Definition Classes
DataFrameWriter
final def synchronized[T0](arg0: => T0): T0
Definition Classes
AnyRef
def text(path: String): Unit
Definition Classes
DataFrameWriter
def toString(): String
Definition Classes
AnyRef → Any
def validatePartitioning(): Unit
Attributes
protected
Definition Classes
DataFrameWriter
final def wait(arg0: Long, arg1: Int): Unit
Definition Classes
AnyRef
Annotations
@throws(classOf[java.lang.InterruptedException])
final def wait(arg0: Long): Unit
Definition Classes
AnyRef
Annotations
@throws(classOf[java.lang.InterruptedException]) @native()
final def wait(): Unit
Definition Classes
AnyRef
Annotations
@throws(classOf[java.lang.InterruptedException])
def xml(path: String): Unit
Definition Classes
DataFrameWriter

Deprecated Value Members

def finalize(): Unit
Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws(classOf[java.lang.Throwable]) @Deprecated
Deprecated
(Since version 9)

Packages

DataFrameWriter

final class DataFrameWriter[T] extends sql.DataFrameWriter[T]

Value Members

Deprecated Value Members

Inherited from sql.DataFrameWriter[T]

Inherited from AnyRef

Inherited from Any

Ungrouped

Packages

DataFrameWriter

final class DataFrameWriter[T] extends sql.DataFrameWriter[T]

Value Members

Deprecated Value Members

Inherited from sql.DataFrameWriter[T]

Inherited from AnyRef

Inherited from Any

Ungrouped

DataFrameWriter