SchemaRDD

Instance Constructors

new SchemaRDD(sqlContext: SQLContext, baseLogicalPlan: LogicalPlan)

Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
def ++(other: RDD[Row]): RDD[Row]

Definition Classes
RDD
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
def aggregate(aggregateExprs: Expression*): SchemaRDD

Performs an aggregation over all Rows in this RDD.
Performs an aggregation over all Rows in this RDD. This is equivalent to a groupBy with no grouping expressions.
```
schemaRDD.aggregate(Sum('sales) as 'totalSales)
```
def aggregate[U](zeroValue: U)(seqOp: (U, Row) ⇒ U, combOp: (U, U) ⇒ U)(implicit arg0: ClassTag[U]): U

Definition Classes
RDD
def as(alias: Symbol): SchemaRDD

Applies a qualifier to the attributes of this relation.
Applies a qualifier to the attributes of this relation. Can be used to disambiguate attributes with the same name, for example, when performing self-joins.
```
val x = schemaRDD.where('a === 1).as('x)
val y = schemaRDD.where('a === 2).as('y)
x.join(y).where("x.a".attr === "y.a".attr),
```
final def asInstanceOf[T0]: T0

Definition Classes
Any
val baseLogicalPlan: LogicalPlan

Definition Classes
SchemaRDD → SchemaRDDLike
def baseSchemaRDD: SchemaRDD

Definition Classes
SchemaRDD → SchemaRDDLike
def cache(): SchemaRDD.this.type

Overridden cache function will always use the in-memory columnar caching.
Overridden cache function will always use the in-memory columnar caching.

Definition Classes
SchemaRDD → RDD
def cartesian[U](other: RDD[U])(implicit arg0: ClassTag[U]): RDD[(Row, U)]

Definition Classes
RDD
def checkpoint(): Unit

Definition Classes
RDD
def clearDependencies(): Unit

Attributes
protected
Definition Classes
RDD
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def coalesce(numPartitions: Int, shuffle: Boolean = false)(implicit ord: Ordering[Row] = null): SchemaRDD

Definition Classes
SchemaRDD → RDD
def collect(): Array[Row]

Definition Classes
SchemaRDD → RDD
def collect[U](f: PartialFunction[Row, U])(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
def compute(split: Partition, context: TaskContext): Iterator[Row]

Definition Classes
SchemaRDD → RDD
def context: SparkContext

Definition Classes
RDD
def count(): Long

:: Experimental :: Return the number of elements in the RDD.
:: Experimental :: Return the number of elements in the RDD. Unlike the base RDD implementation of count, this implementation leverages the query optimizer to compute the count on the SchemaRDD, which supports features such as filter pushdown.

Definition Classes
SchemaRDD → RDD
Annotations
@Experimental()
def countApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble]

Definition Classes
RDD
Annotations
@Experimental()
def countApproxDistinct(relativeSD: Double): Long

Definition Classes
RDD
def countApproxDistinct(p: Int, sp: Int): Long

Definition Classes
RDD
Annotations
@Experimental()
def countByValue()(implicit ord: Ordering[Row]): Map[Row, Long]

Definition Classes
RDD
def countByValueApprox(timeout: Long, confidence: Double)(implicit ord: Ordering[Row]): PartialResult[Map[Row, BoundedDouble]]

Definition Classes
RDD
Annotations
@Experimental()
final def dependencies: Seq[Dependency[_]]

Definition Classes
RDD
def distinct(numPartitions: Int)(implicit ord: Ordering[Row] = null): SchemaRDD

Definition Classes
SchemaRDD → RDD
def distinct(): SchemaRDD

Definition Classes
SchemaRDD → RDD
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def except(otherPlan: SchemaRDD): SchemaRDD

Performs a relational except on two SchemaRDDs
Performs a relational except on two SchemaRDDs
otherPlan
the SchemaRDD that should be excepted from this one.
def filter(f: (Row) ⇒ Boolean): SchemaRDD

Definition Classes
SchemaRDD → RDD
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def first(): Row

Definition Classes
RDD
def firstParent[U](implicit arg0: ClassTag[U]): RDD[U]

Attributes
protected[org.apache.spark]
Definition Classes
RDD
def flatMap[U](f: (Row) ⇒ TraversableOnce[U])(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
def fold(zeroValue: Row)(op: (Row, Row) ⇒ Row): Row

Definition Classes
RDD
def foreach(f: (Row) ⇒ Unit): Unit

Definition Classes
RDD
def foreachPartition(f: (Iterator[Row]) ⇒ Unit): Unit

Definition Classes
RDD
def generate(generator: Generator, join: Boolean = false, outer: Boolean = false, alias: Option[String] = None): SchemaRDD

:: Experimental :: Applies the given Generator, or table generating function, to this relation.
:: Experimental :: Applies the given Generator, or table generating function, to this relation.
generator
A table generating function. The API for such functions is likely to change in future releases
join
when set to true, each output row of the generator is joined with the input row that produced it.
outer
when set to true, at least one row will be produced for each input row, similar to an OUTER JOIN in SQL. When no output rows are produced by the generator for a given row, a single row will be output, with NULL values for each of the generated columns.
alias
an optional alias that can be used as qualifier for the attributes that are produced by this generate operation.

Annotations
@Experimental()
def getCheckpointFile: Option[String]

Definition Classes
RDD
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getDependencies: Seq[Dependency[_]]

Attributes
protected
Definition Classes
SchemaRDD → RDD
def getPartitions: Array[Partition]

Definition Classes
SchemaRDD → RDD
def getPreferredLocations(split: Partition): Seq[String]

Attributes
protected
Definition Classes
RDD
def getStorageLevel: StorageLevel

Definition Classes
RDD
def glom(): RDD[Array[Row]]

Definition Classes
RDD
def groupBy(groupingExprs: Expression*)(aggregateExprs: Expression*): SchemaRDD

Performs a grouping followed by an aggregation.
Performs a grouping followed by an aggregation.
```
schemaRDD.groupBy('year)(Sum('sales) as 'totalSales)
```
def groupBy[K](f: (Row) ⇒ K, p: Partitioner)(implicit kt: ClassTag[K], ord: Ordering[K]): RDD[(K, Iterable[Row])]

Definition Classes
RDD
def groupBy[K](f: (Row) ⇒ K, numPartitions: Int)(implicit kt: ClassTag[K]): RDD[(K, Iterable[Row])]

Definition Classes
RDD
def groupBy[K](f: (Row) ⇒ K)(implicit kt: ClassTag[K]): RDD[(K, Iterable[Row])]

Definition Classes
RDD
def hashCode(): Int

Definition Classes
AnyRef → Any
val id: Int

Definition Classes
RDD
def insertInto(tableName: String): Unit

:: Experimental :: Appends the rows from this RDD to the specified table.
:: Experimental :: Appends the rows from this RDD to the specified table.

Definition Classes
SchemaRDDLike
Annotations
@Experimental()
def insertInto(tableName: String, overwrite: Boolean): Unit

:: Experimental :: Adds the rows from this RDD to the specified table, optionally overwriting the existing data.
:: Experimental :: Adds the rows from this RDD to the specified table, optionally overwriting the existing data.

Definition Classes
SchemaRDDLike
Annotations
@Experimental()
def intersect(otherPlan: SchemaRDD): SchemaRDD

Performs a relational intersect on two SchemaRDDs
Performs a relational intersect on two SchemaRDDs
otherPlan
the SchemaRDD that should be intersected with this one.
def intersection(other: RDD[Row], numPartitions: Int): SchemaRDD

Definition Classes
SchemaRDD → RDD
def intersection(other: RDD[Row], partitioner: Partitioner)(implicit ord: Ordering[Row] = null): SchemaRDD

Definition Classes
SchemaRDD → RDD
def intersection(other: RDD[Row]): SchemaRDD

Definition Classes
SchemaRDD → RDD
def isCheckpointed: Boolean

Definition Classes
RDD
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def isTraceEnabled(): Boolean

Attributes
protected
Definition Classes
Logging
final def iterator(split: Partition, context: TaskContext): Iterator[Row]

Definition Classes
RDD
def join(otherPlan: SchemaRDD, joinType: JoinType = Inner, on: Option[Expression] = None): SchemaRDD

Performs a relational join on two SchemaRDDs
Performs a relational join on two SchemaRDDs
otherPlan
the SchemaRDD that should be joined with this one.
joinType
One of Inner, LeftOuter, RightOuter, or FullOuter. Defaults to Inner.
on
An optional condition for the join operation. This is equivalent to the ON clause in standard SQL. In the case of Inner joins, specifying a condition is equivalent to adding where clauses after the join.
def keyBy[K](f: (Row) ⇒ K): RDD[(K, Row)]

Definition Classes
RDD
def limit(limitNum: Int): SchemaRDD

Limits the results by the given integer.
Limits the results by the given integer.
```
schemaRDD.limit(10)
```
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
val logicalPlan: LogicalPlan

Attributes
protected[org.apache.spark]
Definition Classes
SchemaRDDLike
def map[U](f: (Row) ⇒ U)(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
def mapPartitions[U](f: (Iterator[Row]) ⇒ Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
def mapPartitionsWithIndex[U](f: (Int, Iterator[Row]) ⇒ Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
def max()(implicit ord: Ordering[Row]): Row

Definition Classes
RDD
def min()(implicit ord: Ordering[Row]): Row

Definition Classes
RDD
var name: String

Definition Classes
RDD
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def orderBy(sortExprs: SortOrder*): SchemaRDD

Sorts the results by the given expressions.
Sorts the results by the given expressions.
```
schemaRDD.orderBy('a)
schemaRDD.orderBy('a, 'b)
schemaRDD.orderBy('a.asc, 'b.desc)
```
def parent[U](j: Int)(implicit arg0: ClassTag[U]): RDD[U]

Attributes
protected[org.apache.spark]
Definition Classes
RDD
val partitioner: Option[Partitioner]

Definition Classes
RDD
final def partitions: Array[Partition]

Definition Classes
RDD
def persist(newLevel: StorageLevel): SchemaRDD.this.type

Definition Classes
SchemaRDD → RDD
def persist(): SchemaRDD.this.type

Definition Classes
RDD
def pipe(command: Seq[String], env: Map[String, String], printPipeContext: ((String) ⇒ Unit) ⇒ Unit, printRDDElement: (Row, (String) ⇒ Unit) ⇒ Unit, separateWorkingDir: Boolean): RDD[String]

Definition Classes
RDD
def pipe(command: String, env: Map[String, String]): RDD[String]

Definition Classes
RDD
def pipe(command: String): RDD[String]

Definition Classes
RDD
final def preferredLocations(split: Partition): Seq[String]

Definition Classes
RDD
def printSchema(): Unit

Prints out the schema.
Prints out the schema.

Definition Classes
SchemaRDDLike
lazy val queryExecution: QueryExecution

:: DeveloperApi :: A lazily computed query execution workflow.
:: DeveloperApi :: A lazily computed query execution workflow. All other RDD operations are passed through to the RDD that is produced by this workflow. This workflow is produced lazily because invoking the whole query optimization pipeline can be expensive.
The query execution is considered a Developer API as phases may be added or removed in future releases. This execution is only exposed to provide an interface for inspecting the various phases for debugging purposes. Applications should not depend on particular phases existing or producing any specific output, even for exactly the same query.
Additionally, the RDD exposed by this execution is not designed for consumption by end users. In particular, it does not contain any schema information, and it reuses Row objects internally. This object reuse improves performance, but can make programming against the RDD more difficult. Instead end users should perform RDD operations on a SchemaRDD directly.

Definition Classes
SchemaRDDLike
def randomSplit(weights: Array[Double], seed: Long): Array[RDD[Row]]

Definition Classes
RDD
def reduce(f: (Row, Row) ⇒ Row): Row

Definition Classes
RDD
def registerTempTable(tableName: String): Unit

Registers this RDD as a temporary table using the given name.
Registers this RDD as a temporary table using the given name. The lifetime of this temporary table is tied to the SQLContext that was used to create this SchemaRDD.

Definition Classes
SchemaRDDLike
def repartition(numPartitions: Int)(implicit ord: Ordering[Row] = null): SchemaRDD

Definition Classes
SchemaRDD → RDD
def sample(withReplacement: Boolean = true, fraction: Double, seed: Long): SchemaRDD

:: Experimental :: Returns a sampled version of the underlying dataset.
:: Experimental :: Returns a sampled version of the underlying dataset.

Definition Classes
SchemaRDD → RDD
Annotations
@Experimental()
def saveAsObjectFile(path: String): Unit

Definition Classes
RDD
def saveAsParquetFile(path: String): Unit

Saves the contents of this SchemaRDD as a parquet file, preserving the schema.
Saves the contents of this SchemaRDD as a parquet file, preserving the schema. Files that are written out using this method can be read back in as a SchemaRDD using the parquetFile function.

Definition Classes
SchemaRDDLike
def saveAsTable(tableName: String): Unit

:: Experimental :: Creates a table from the the contents of this SchemaRDD.
:: Experimental :: Creates a table from the the contents of this SchemaRDD. This will fail if the table already exists.
Note that this currently only works with SchemaRDDs that are created from a HiveContext as there is no notion of a persisted catalog in a standard SQL context. Instead you can write an RDD out to a parquet file, and then register that file as a table. This "table" can then be the target of an insertInto.

Definition Classes
SchemaRDDLike
Annotations
@Experimental()
def saveAsTextFile(path: String, codec: Class[_ <: CompressionCodec]): Unit

Definition Classes
RDD
def saveAsTextFile(path: String): Unit

Definition Classes
RDD
lazy val schema: StructType

Returns the schema of this SchemaRDD (represented by a StructType).
def schemaString: String

Returns the schema as a string in the tree format.
Returns the schema as a string in the tree format.

Definition Classes
SchemaRDDLike
def select(exprs: Expression*): SchemaRDD

Changes the output of this relation to the given expressions, similar to the SELECT clause in SQL.
Changes the output of this relation to the given expressions, similar to the SELECT clause in SQL.
```
schemaRDD.select('a, 'b + 'c, 'd as 'aliasedName)
```
exprs
a set of logical expression that will be evaluated for each input row.
def setName(_name: String): SchemaRDD.this.type

Definition Classes
RDD
def sortBy[K](f: (Row) ⇒ K, ascending: Boolean, numPartitions: Int)(implicit ord: Ordering[K], ctag: ClassTag[K]): RDD[Row]

Definition Classes
RDD
def sparkContext: SparkContext

Definition Classes
RDD
val sqlContext: SQLContext

Definition Classes
SchemaRDD → SchemaRDDLike
def subtract(other: RDD[Row], p: Partitioner)(implicit ord: Ordering[Row] = null): SchemaRDD

Definition Classes
SchemaRDD → RDD
def subtract(other: RDD[Row], numPartitions: Int): SchemaRDD

Definition Classes
SchemaRDD → RDD
def subtract(other: RDD[Row]): SchemaRDD

Definition Classes
SchemaRDD → RDD
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def take(num: Int): Array[Row]

Definition Classes
SchemaRDD → RDD
def takeOrdered(num: Int)(implicit ord: Ordering[Row]): Array[Row]

Definition Classes
RDD
def takeSample(withReplacement: Boolean, num: Int, seed: Long): Array[Row]

Definition Classes
RDD
def toDebugString: String

Definition Classes
RDD
def toJSON: RDD[String]

Returns a new RDD with each row transformed to a JSON string.
def toJavaRDD(): JavaRDD[Row]

Definition Classes
RDD
def toJavaSchemaRDD: JavaSchemaRDD

Returns this RDD as a JavaSchemaRDD.
def toLocalIterator: Iterator[Row]

Definition Classes
RDD
def toSchemaRDD: SchemaRDD

Returns this RDD as a SchemaRDD.
Returns this RDD as a SchemaRDD. Intended primarily to force the invocation of the implicit conversion from a standard RDD to a SchemaRDD.
def toString(): String

Definition Classes
SchemaRDDLike → AnyRef → Any
def top(num: Int)(implicit ord: Ordering[Row]): Array[Row]

Definition Classes
RDD
def union(other: RDD[Row]): RDD[Row]

Definition Classes
RDD
def unionAll(otherPlan: SchemaRDD): SchemaRDD

Combines the tuples of two RDDs with the same schema, keeping duplicates.
def unpersist(blocking: Boolean): SchemaRDD.this.type

Definition Classes
SchemaRDD → RDD
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
def where(dynamicUdf: (DynamicRow) ⇒ Boolean): SchemaRDD

:: Experimental :: Filters tuples using a function over a Dynamic version of a given Row.
:: Experimental :: Filters tuples using a function over a Dynamic version of a given Row. DynamicRows use scala's Dynamic trait to emulate an ORM of in a dynamically typed language. Since the type of the column is not known at compile time, all attributes are converted to strings before being passed to the function.
```
schemaRDD.where(r => r.firstName == "Bob" && r.lastName == "Smith")
```
Annotations
@Experimental()
def where[T1](arg1: Symbol)(udf: (T1) ⇒ Boolean): SchemaRDD

Filters tuples using a function over the value of the specified column.
Filters tuples using a function over the value of the specified column.
```
schemaRDD.where('a)((a: Int) => ...)
```
def where(condition: Expression): SchemaRDD

Filters the output, only returning those rows where condition evaluates to true.
Filters the output, only returning those rows where condition evaluates to true.
```
schemaRDD.where('a === 'b)
schemaRDD.where('a === 1)
schemaRDD.where('a + 'b > 10)
```
def zip[U](other: RDD[U])(implicit arg0: ClassTag[U]): RDD[(Row, U)]

Definition Classes
RDD
def zipPartitions[B, C, D, V](rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D])(f: (Iterator[Row], Iterator[B], Iterator[C], Iterator[D]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[D], arg3: ClassTag[V]): RDD[V]

Definition Classes
RDD
def zipPartitions[B, C, D, V](rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D], preservesPartitioning: Boolean)(f: (Iterator[Row], Iterator[B], Iterator[C], Iterator[D]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[D], arg3: ClassTag[V]): RDD[V]

Definition Classes
RDD
def zipPartitions[B, C, V](rdd2: RDD[B], rdd3: RDD[C])(f: (Iterator[Row], Iterator[B], Iterator[C]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[V]): RDD[V]

Definition Classes
RDD
def zipPartitions[B, C, V](rdd2: RDD[B], rdd3: RDD[C], preservesPartitioning: Boolean)(f: (Iterator[Row], Iterator[B], Iterator[C]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[V]): RDD[V]

Definition Classes
RDD
def zipPartitions[B, V](rdd2: RDD[B])(f: (Iterator[Row], Iterator[B]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[V]): RDD[V]

Definition Classes
RDD
def zipPartitions[B, V](rdd2: RDD[B], preservesPartitioning: Boolean)(f: (Iterator[Row], Iterator[B]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[V]): RDD[V]

Definition Classes
RDD
def zipWithIndex(): RDD[(Row, Long)]

Definition Classes
RDD
def zipWithUniqueId(): RDD[(Row, Long)]

Definition Classes
RDD

Deprecated Value Members

def filterWith[A](constructA: (Int) ⇒ A)(p: (Row, A) ⇒ Boolean): RDD[Row]

Definition Classes
RDD
Annotations
@deprecated
Deprecated
(Since version 1.0.0) use mapPartitionsWithIndex and filter
def flatMapWith[A, U](constructA: (Int) ⇒ A, preservesPartitioning: Boolean)(f: (Row, A) ⇒ Seq[U])(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
Annotations
@deprecated
Deprecated
(Since version 1.0.0) use mapPartitionsWithIndex and flatMap
def foreachWith[A](constructA: (Int) ⇒ A)(f: (Row, A) ⇒ Unit): Unit

Definition Classes
RDD
Annotations
@deprecated
Deprecated
(Since version 1.0.0) use mapPartitionsWithIndex and foreach
def limit(limitExpr: Expression): SchemaRDD

Annotations
@deprecated
Deprecated
(Since version 1.1.0) use limit with integer argument
def mapPartitionsWithContext[U](f: (TaskContext, Iterator[Row]) ⇒ Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
Annotations
@DeveloperApi() @deprecated
Deprecated
(Since version 1.2.0) use TaskContext.get
def mapPartitionsWithSplit[U](f: (Int, Iterator[Row]) ⇒ Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
Annotations
@deprecated
Deprecated
(Since version 0.7.0) use mapPartitionsWithIndex
def mapWith[A, U](constructA: (Int) ⇒ A, preservesPartitioning: Boolean)(f: (Row, A) ⇒ U)(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
Annotations
@deprecated
Deprecated
(Since version 1.0.0) use mapPartitionsWithIndex
def registerAsTable(tableName: String): Unit

Definition Classes
SchemaRDDLike
Annotations
@deprecated
Deprecated
(Since version 1.1) Use registerTempTable instead of registerAsTable.
def toArray(): Array[Row]

Definition Classes
RDD
Annotations
@deprecated
Deprecated
(Since version 1.0.0) use collect

Inherited from SchemaRDDLike

Inherited from RDD[Row]

Inherited from Logging

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Language Integrated Queries

Functions that create new queries from SchemaRDDs. The result of all query functions is also a SchemaRDD, allowing multiple operations to be chained using a builder pattern.

class SchemaRDD extends RDD[Row] with SchemaRDDLike

SQL Queries

Language Integrated Queries

Instance Constructors

new SchemaRDD(sqlContext: SQLContext, baseLogicalPlan: LogicalPlan)

Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

def ++(other: RDD[Row]): RDD[Row]

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

def aggregate(aggregateExprs: Expression*): SchemaRDD

def aggregate[U](zeroValue: U)(seqOp: (U, Row) ⇒ U, combOp: (U, U) ⇒ U)(implicit arg0: ClassTag[U]): U

def as(alias: Symbol): SchemaRDD

final def asInstanceOf[T0]: T0

val baseLogicalPlan: LogicalPlan

def baseSchemaRDD: SchemaRDD

def cache(): SchemaRDD.this.type

def cartesian[U](other: RDD[U])(implicit arg0: ClassTag[U]): RDD[(Row, U)]

def checkpoint(): Unit

def clearDependencies(): Unit

def clone(): AnyRef

def coalesce(numPartitions: Int, shuffle: Boolean = false)(implicit ord: Ordering[Row] = null): SchemaRDD

def collect(): Array[Row]

def collect[U](f: PartialFunction[Row, U])(implicit arg0: ClassTag[U]): RDD[U]

def compute(split: Partition, context: TaskContext): Iterator[Row]

def context: SparkContext

def count(): Long

def countApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble]

def countApproxDistinct(relativeSD: Double): Long

def countApproxDistinct(p: Int, sp: Int): Long

def countByValue()(implicit ord: Ordering[Row]): Map[Row, Long]

def countByValueApprox(timeout: Long, confidence: Double)(implicit ord: Ordering[Row]): PartialResult[Map[Row, BoundedDouble]]

final def dependencies: Seq[Dependency[_]]

def distinct(numPartitions: Int)(implicit ord: Ordering[Row] = null): SchemaRDD

def distinct(): SchemaRDD

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def except(otherPlan: SchemaRDD): SchemaRDD

def filter(f: (Row) ⇒ Boolean): SchemaRDD

def finalize(): Unit

def first(): Row

def firstParent[U](implicit arg0: ClassTag[U]): RDD[U]

def flatMap[U](f: (Row) ⇒ TraversableOnce[U])(implicit arg0: ClassTag[U]): RDD[U]

def fold(zeroValue: Row)(op: (Row, Row) ⇒ Row): Row

def foreach(f: (Row) ⇒ Unit): Unit

def foreachPartition(f: (Iterator[Row]) ⇒ Unit): Unit

def generate(generator: Generator, join: Boolean = false, outer: Boolean = false, alias: Option[String] = None): SchemaRDD

def getCheckpointFile: Option[String]

final def getClass(): Class[_]

def getDependencies: Seq[Dependency[_]]

def getPartitions: Array[Partition]

def getPreferredLocations(split: Partition): Seq[String]

def getStorageLevel: StorageLevel

def glom(): RDD[Array[Row]]

def groupBy(groupingExprs: Expression*)(aggregateExprs: Expression*): SchemaRDD

def groupBy[K](f: (Row) ⇒ K, p: Partitioner)(implicit kt: ClassTag[K], ord: Ordering[K]): RDD[(K, Iterable[Row])]

def groupBy[K](f: (Row) ⇒ K, numPartitions: Int)(implicit kt: ClassTag[K]): RDD[(K, Iterable[Row])]

def groupBy[K](f: (Row) ⇒ K)(implicit kt: ClassTag[K]): RDD[(K, Iterable[Row])]

def hashCode(): Int

val id: Int

def insertInto(tableName: String): Unit

def insertInto(tableName: String, overwrite: Boolean): Unit

def intersect(otherPlan: SchemaRDD): SchemaRDD

def intersection(other: RDD[Row], numPartitions: Int): SchemaRDD

def intersection(other: RDD[Row], partitioner: Partitioner)(implicit ord: Ordering[Row] = null): SchemaRDD

def intersection(other: RDD[Row]): SchemaRDD

def isCheckpointed: Boolean

final def isInstanceOf[T0]: Boolean

def isTraceEnabled(): Boolean

final def iterator(split: Partition, context: TaskContext): Iterator[Row]

def join(otherPlan: SchemaRDD, joinType: JoinType = Inner, on: Option[Expression] = None): SchemaRDD

def keyBy[K](f: (Row) ⇒ K): RDD[(K, Row)]

def limit(limitNum: Int): SchemaRDD

def log: Logger

def logDebug(msg: ⇒ String, throwable: Throwable): Unit

def logDebug(msg: ⇒ String): Unit

def logError(msg: ⇒ String, throwable: Throwable): Unit

def groupBy(groupingExprs: Expression)(aggregateExprs: Expression): SchemaRDD