SparkSession

Type Members

final case class Builder(builder: org.apache.spark.sql.SparkSession.Builder, extraConfigs: Map[String, String], hiveSupport: Boolean = false) extends Product with Serializable
trait Conf extends AnyRef

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def attempt[Out](f: (org.apache.spark.sql.SparkSession) ⇒ Out)(implicit trace: Trace): SIO[Out]
def builder: Builder

Creates a SparkSession.Builder.
Creates a SparkSession.Builder.
See UnderlyingSparkSession.builder for more information.
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def close(implicit trace: Trace): RIO[SparkSession, Unit]

Closes the current SparkSession.
def conf: URIO[SparkSession, Conf]

def createDataFrame(rowRDD: RDD[Row], schema: StructType): RIO[SparkSession, DataFrame]

Creates a DataFrame from an RDD containing Rows using the given schema.

Creates a DataFrame from an RDD containing Rows using the given schema. It is important to make sure that the structure of every Row of the provided RDD matches the provided schema. Otherwise, there will be runtime exception. Example:

import org.apache.spark.sql._
import org.apache.spark.sql.types._
val sparkSession = new org.apache.spark.sql.SparkSession(sc)

val schema =
  StructType(
    StructField("name", StringType, false) ::
    StructField("age", IntegerType, true) :: Nil)

val people =
  sc.textFile("examples/src/main/resources/people.txt").map(
    _.split(",")).map(p => Row(p(0), p(1).trim.toInt))
val dataFrame = sparkSession.createDataFrame(people, schema)
dataFrame.printSchema
// root
// |-- name: string (nullable = false)
// |-- age: integer (nullable = true)

dataFrame.createOrReplaceTempView("people")
sparkSession.sql("select name from people").collect.foreach(println)

Since: 2.0.0

def createDataFrame[A <: Product](data: Seq[A])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[A]): RIO[SparkSession, DataFrame]

Creates a DataFrame from a local Seq of Product.
Creates a DataFrame from a local Seq of Product.

Since
2.0.0
def createDataFrame[A <: Product](rdd: RDD[A])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[A]): RIO[SparkSession, DataFrame]

Creates a DataFrame from an RDD of Product (e.g.
Creates a DataFrame from an RDD of Product (e.g. case classes, tuples).

Since
2.0.0
def createDataset[T](data: RDD[T])(implicit arg0: Encoder[T]): RIO[SparkSession, Dataset[T]]

Creates a Dataset from an RDD of a given type.
Creates a Dataset from an RDD of a given type. This method requires an encoder (to convert a JVM object of type T to and from the internal Spark SQL representation) that is generally created automatically through implicits from a SparkSession, or can be created explicitly by calling static methods on Encoders.

Since
2.0.0
def createDataset[T](data: Seq[T])(implicit arg0: Encoder[T]): RIO[SparkSession, Dataset[T]]

Creates a Dataset from a local Seq of data of a given type.
Creates a Dataset from a local Seq of data of a given type. This method requires an encoder (to convert a JVM object of type T to and from the internal Spark SQL representation) that is generally created automatically through implicits from a SparkSession, or can be created explicitly by calling static methods on Encoders.
Example
```
import spark.implicits._
case class Person(name: String, age: Long)
val data = Seq(Person("Michael", 29), Person("Andy", 30), Person("Justin", 19))
val ds = spark.createDataset(data)

ds.show()
// +-------+---+
// |   name|age|
// +-------+---+
// |Michael| 29|
// |   Andy| 30|
// | Justin| 19|
// +-------+---+
```
Since
2.0.0
def emptyDataset[T](implicit arg0: Encoder[T]): RIO[SparkSession, Dataset[T]]

Creates a new Dataset of type T containing zero elements.
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def read: DataFrameReader[WithoutSchema]

Creates a DataFrameReader.
def readStream: DataStreamReader

Creates a DataStreamReader.
def sql(sqlText: String)(implicit trace: Trace): RIO[SparkSession, DataFrame]

Executes a SQL query using Spark.
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Docs: class SparkSession | package sql

object SparkSession extends Serializable

Type Members

final case class Builder(builder: org.apache.spark.sql.SparkSession.Builder, extraConfigs: Map[String, String], hiveSupport: Boolean = false) extends Product with Serializable

trait Conf extends AnyRef

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def attempt[Out](f: (org.apache.spark.sql.SparkSession) ⇒ Out)(implicit trace: Trace): SIO[Out]

def builder: Builder

def clone(): AnyRef

def close(implicit trace: Trace): RIO[SparkSession, Unit]

def conf: URIO[SparkSession, Conf]

def createDataFrame(rowRDD: RDD[Row], schema: StructType): RIO[SparkSession, DataFrame]

def createDataFrame[A <: Product](data: Seq[A])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[A]): RIO[SparkSession, DataFrame]

def createDataFrame[A <: Product](rdd: RDD[A])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[A]): RIO[SparkSession, DataFrame]

def createDataset[T](data: RDD[T])(implicit arg0: Encoder[T]): RIO[SparkSession, Dataset[T]]

def createDataset[T](data: Seq[T])(implicit arg0: Encoder[T]): RIO[SparkSession, Dataset[T]]

Example

def emptyDataset[T](implicit arg0: Encoder[T]): RIO[SparkSession, Dataset[T]]

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def read: DataFrameReader[WithoutSchema]

def readStream: DataStreamReader

def sql(sqlText: String)(implicit trace: Trace): RIO[SparkSession, DataFrame]

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Serializable

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Ungrouped