JavaSchemaRDD

Instance Constructors

new JavaSchemaRDD(sqlContext: SQLContext, baseLogicalPlan: LogicalPlan)

Value Members

final def !=(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def !=(arg0: Any): Boolean

Definition Classes
Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def ==(arg0: Any): Boolean

Definition Classes
Any
def aggregate[U](zeroValue: U)(seqOp: Function2[U, Row, U], combOp: Function2[U, U, U]): U

Definition Classes
JavaRDDLike
final def asInstanceOf[T0]: T0

Definition Classes
Any
val baseLogicalPlan: LogicalPlan

Definition Classes
JavaSchemaRDD → SchemaRDDLike
def cache(): JavaSchemaRDD

Persist this RDD with the default storage level (MEMORY_ONLY).
def cartesian[U](other: JavaRDDLike[U, _]): JavaPairRDD[Row, U]

Definition Classes
JavaRDDLike
def checkpoint(): Unit

Definition Classes
JavaRDDLike
val classTag: ClassTag[Row]

Definition Classes
JavaSchemaRDD → JavaRDDLike
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def coalesce(numPartitions: Int, shuffle: Boolean = false): JavaSchemaRDD

Return a new RDD that is reduced into numPartitions partitions.
def collect(): List[Row]

Definition Classes
JavaRDDLike
def collectPartitions(partitionIds: Array[Int]): Array[List[Row]]

Definition Classes
JavaRDDLike
def context: SparkContext

Definition Classes
JavaRDDLike
def count(): Long

Definition Classes
JavaRDDLike
def countApprox(timeout: Long): PartialResult[BoundedDouble]

Definition Classes
JavaRDDLike
Annotations
@Experimental()
def countApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble]

Definition Classes
JavaRDDLike
Annotations
@Experimental()
def countApproxDistinct(relativeSD: Double): Long

Definition Classes
JavaRDDLike
def countByValue(): Map[Row, Long]

Definition Classes
JavaRDDLike
def countByValueApprox(timeout: Long): PartialResult[Map[Row, BoundedDouble]]

Definition Classes
JavaRDDLike
def countByValueApprox(timeout: Long, confidence: Double): PartialResult[Map[Row, BoundedDouble]]

Definition Classes
JavaRDDLike
def distinct(numPartitions: Int): JavaSchemaRDD

Return a new RDD containing the distinct elements in this RDD.
def distinct(): JavaSchemaRDD

Return a new RDD containing the distinct elements in this RDD.
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def filter(f: Function[Row, Boolean]): JavaSchemaRDD

Return a new RDD containing only the elements that satisfy a predicate.
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def first(): Row

Definition Classes
JavaRDDLike
def flatMap[U](f: FlatMapFunction[Row, U]): JavaRDD[U]

Definition Classes
JavaRDDLike
def flatMapToDouble(f: DoubleFlatMapFunction[Row]): JavaDoubleRDD

Definition Classes
JavaRDDLike
def flatMapToPair[K2, V2](f: PairFlatMapFunction[Row, K2, V2]): JavaPairRDD[K2, V2]

Definition Classes
JavaRDDLike
def fold(zeroValue: Row)(f: Function2[Row, Row, Row]): Row

Definition Classes
JavaRDDLike
def foreach(f: VoidFunction[Row]): Unit

Definition Classes
JavaRDDLike
def foreachPartition(f: VoidFunction[Iterator[Row]]): Unit

Definition Classes
JavaRDDLike
def getCheckpointFile(): Optional[String]

Definition Classes
JavaRDDLike
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getStorageLevel: StorageLevel

Definition Classes
JavaRDDLike
def glom(): JavaRDD[List[Row]]

Definition Classes
JavaRDDLike
def groupBy[K](f: Function[Row, K], numPartitions: Int): JavaPairRDD[K, Iterable[Row]]

Definition Classes
JavaRDDLike
def groupBy[K](f: Function[Row, K]): JavaPairRDD[K, Iterable[Row]]

Definition Classes
JavaRDDLike
def hashCode(): Int

Definition Classes
AnyRef → Any
def id: Int

Definition Classes
JavaRDDLike
def insertInto(tableName: String): Unit

:: Experimental :: Appends the rows from this RDD to the specified table.
:: Experimental :: Appends the rows from this RDD to the specified table.

Definition Classes
SchemaRDDLike
Annotations
@Experimental()
def insertInto(tableName: String, overwrite: Boolean): Unit

:: Experimental :: Adds the rows from this RDD to the specified table, optionally overwriting the existing data.
:: Experimental :: Adds the rows from this RDD to the specified table, optionally overwriting the existing data.

Definition Classes
SchemaRDDLike
Annotations
@Experimental()
def intersection(other: JavaSchemaRDD, numPartitions: Int): JavaSchemaRDD

Return the intersection of this RDD and another one.
Return the intersection of this RDD and another one. The output will not contain any duplicate elements, even if the input RDDs did. Performs a hash partition across the cluster
Note that this method performs a shuffle internally.
numPartitions
How many partitions to use in the resulting RDD
def intersection(other: JavaSchemaRDD, partitioner: Partitioner): JavaSchemaRDD

Return the intersection of this RDD and another one.
Return the intersection of this RDD and another one. The output will not contain any duplicate elements, even if the input RDDs did.
Note that this method performs a shuffle internally.
partitioner
Partitioner to use for the resulting RDD
def intersection(other: JavaSchemaRDD): JavaSchemaRDD

Return the intersection of this RDD and another one.
Return the intersection of this RDD and another one. The output will not contain any duplicate elements, even if the input RDDs did.
Note that this method performs a shuffle internally.
def isCheckpointed: Boolean

Definition Classes
JavaRDDLike
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def iterator(split: Partition, taskContext: TaskContext): Iterator[Row]

Definition Classes
JavaRDDLike
def keyBy[K](f: Function[Row, K]): JavaPairRDD[K, Row]

Definition Classes
JavaRDDLike
val logicalPlan: LogicalPlan

Attributes
protected[org.apache.spark]
Definition Classes
SchemaRDDLike
def map[R](f: Function[Row, R]): JavaRDD[R]

Definition Classes
JavaRDDLike
def mapPartitions[U](f: FlatMapFunction[Iterator[Row], U], preservesPartitioning: Boolean): JavaRDD[U]

Definition Classes
JavaRDDLike
def mapPartitions[U](f: FlatMapFunction[Iterator[Row], U]): JavaRDD[U]

Definition Classes
JavaRDDLike
def mapPartitionsToDouble(f: DoubleFlatMapFunction[Iterator[Row]], preservesPartitioning: Boolean): JavaDoubleRDD

Definition Classes
JavaRDDLike
def mapPartitionsToDouble(f: DoubleFlatMapFunction[Iterator[Row]]): JavaDoubleRDD

Definition Classes
JavaRDDLike
def mapPartitionsToPair[K2, V2](f: PairFlatMapFunction[Iterator[Row], K2, V2], preservesPartitioning: Boolean): JavaPairRDD[K2, V2]

Definition Classes
JavaRDDLike
def mapPartitionsToPair[K2, V2](f: PairFlatMapFunction[Iterator[Row], K2, V2]): JavaPairRDD[K2, V2]

Definition Classes
JavaRDDLike
def mapPartitionsWithIndex[R](f: Function2[Integer, Iterator[Row], Iterator[R]], preservesPartitioning: Boolean): JavaRDD[R]

Definition Classes
JavaRDDLike
def mapToDouble[R](f: DoubleFunction[Row]): JavaDoubleRDD

Definition Classes
JavaRDDLike
def mapToPair[K2, V2](f: PairFunction[Row, K2, V2]): JavaPairRDD[K2, V2]

Definition Classes
JavaRDDLike
def max(comp: Comparator[Row]): Row

Definition Classes
JavaRDDLike
def min(comp: Comparator[Row]): Row

Definition Classes
JavaRDDLike
def name(): String

Definition Classes
JavaRDDLike
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def persist(newLevel: StorageLevel): JavaSchemaRDD

Set this RDD's storage level to persist its values across operations after the first time it is computed.
Set this RDD's storage level to persist its values across operations after the first time it is computed. This can only be used to assign a new storage level if the RDD does not have a storage level set yet..
def persist(): JavaSchemaRDD

Persist this RDD with the default storage level (MEMORY_ONLY).
def pipe(command: List[String], env: Map[String, String]): JavaRDD[String]

Definition Classes
JavaRDDLike
def pipe(command: List[String]): JavaRDD[String]

Definition Classes
JavaRDDLike
def pipe(command: String): JavaRDD[String]

Definition Classes
JavaRDDLike
def printSchema(): Unit

Prints out the schema in the tree format.
Prints out the schema in the tree format.

Definition Classes
SchemaRDDLike
lazy val queryExecution: QueryExecution

:: DeveloperApi :: A lazily computed query execution workflow.
:: DeveloperApi :: A lazily computed query execution workflow. All other RDD operations are passed through to the RDD that is produced by this workflow. This workflow is produced lazily because invoking the whole query optimization pipeline can be expensive.
The query execution is considered a Developer API as phases may be added or removed in future releases. This execution is only exposed to provide an interface for inspecting the various phases for debugging purposes. Applications should not depend on particular phases existing or producing any specific output, even for exactly the same query.
Additionally, the RDD exposed by this execution is not designed for consumption by end users. In particular, it does not contain any schema information, and it reuses Row objects internally. This object reuse improves performance, but can make programming against the RDD more difficult. Instead end users should perform RDD operations on a SchemaRDD directly.

Definition Classes
SchemaRDDLike
val rdd: RDD[Row]

Definition Classes
JavaSchemaRDD → JavaRDDLike
def reduce(f: Function2[Row, Row, Row]): Row

Definition Classes
JavaRDDLike
def registerAsTable(tableName: String): Unit

Registers this RDD as a temporary table using the given name.
Registers this RDD as a temporary table using the given name. The lifetime of this temporary table is tied to the SQLContext that was used to create this SchemaRDD.

Definition Classes
SchemaRDDLike
def repartition(numPartitions: Int): JavaSchemaRDD

Return a new RDD that has exactly numPartitions partitions.
Return a new RDD that has exactly numPartitions partitions.
Can increase or decrease the level of parallelism in this RDD. Internally, this uses a shuffle to redistribute data.
If you are decreasing the number of partitions in this RDD, consider using coalesce, which can avoid performing a shuffle.
def saveAsObjectFile(path: String): Unit

Definition Classes
JavaRDDLike
def saveAsParquetFile(path: String): Unit

Saves the contents of this SchemaRDD as a parquet file, preserving the schema.
Saves the contents of this SchemaRDD as a parquet file, preserving the schema. Files that are written out using this method can be read back in as a SchemaRDD using the parquetFile function.

Definition Classes
SchemaRDDLike
def saveAsTable(tableName: String): Unit

:: Experimental :: Creates a table from the the contents of this SchemaRDD.
:: Experimental :: Creates a table from the the contents of this SchemaRDD. This will fail if the table already exists.
Note that this currently only works with SchemaRDDs that are created from a HiveContext as there is no notion of a persisted catalog in a standard SQL context. Instead you can write an RDD out to a parquet file, and then register that file as a table. This "table" can then be the target of an insertInto.

Definition Classes
SchemaRDDLike
Annotations
@Experimental()
def saveAsTextFile(path: String, codec: Class[_ <: CompressionCodec]): Unit

Definition Classes
JavaRDDLike
def saveAsTextFile(path: String): Unit

Definition Classes
JavaRDDLike
def schemaString: String

Returns the output schema in the tree format.
Returns the output schema in the tree format.

Definition Classes
SchemaRDDLike
def setName(name: String): JavaSchemaRDD

Assign a name to this RDD
def splits: List[Partition]

Definition Classes
JavaRDDLike
val sqlContext: SQLContext

Definition Classes
JavaSchemaRDD → SchemaRDDLike
def subtract(other: JavaSchemaRDD, p: Partitioner): JavaSchemaRDD

Return an RDD with the elements from this that are not in other.
def subtract(other: JavaSchemaRDD, numPartitions: Int): JavaSchemaRDD

Return an RDD with the elements from this that are not in other.
def subtract(other: JavaSchemaRDD): JavaSchemaRDD

Return an RDD with the elements from this that are not in other.
Return an RDD with the elements from this that are not in other.
Uses this partitioner/partition size, because even if other is huge, the resulting RDD will be <= us.
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def take(num: Int): List[Row]

Definition Classes
JavaRDDLike
def takeOrdered(num: Int): List[Row]

Definition Classes
JavaRDDLike
def takeOrdered(num: Int, comp: Comparator[Row]): List[Row]

Definition Classes
JavaRDDLike
def takeSample(withReplacement: Boolean, num: Int, seed: Long): List[Row]

Definition Classes
JavaRDDLike
def takeSample(withReplacement: Boolean, num: Int): List[Row]

Definition Classes
JavaRDDLike
def toArray(): List[Row]

Definition Classes
JavaRDDLike
Annotations
@Deprecated
def toDebugString(): String

Definition Classes
JavaRDDLike
def toLocalIterator(): Iterator[Row]

Definition Classes
JavaRDDLike
def toString(): String

Definition Classes
JavaSchemaRDD → SchemaRDDLike → AnyRef → Any
def top(num: Int): List[Row]

Definition Classes
JavaRDDLike
def top(num: Int, comp: Comparator[Row]): List[Row]

Definition Classes
JavaRDDLike
def unpersist(blocking: Boolean = true): JavaSchemaRDD

Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.
Mark the RDD as non-persistent, and remove all blocks for it from memory and disk.
blocking
Whether to block until all blocks are deleted.
returns
This RDD.
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
def wrapRDD(rdd: RDD[Row]): JavaRDD[Row]

Definition Classes
JavaSchemaRDD → JavaRDDLike
def zip[U](other: JavaRDDLike[U, _]): JavaPairRDD[Row, U]

Definition Classes
JavaRDDLike
def zipPartitions[U, V](other: JavaRDDLike[U, _], f: FlatMapFunction2[Iterator[Row], Iterator[U], V]): JavaRDD[V]

Definition Classes
JavaRDDLike
def zipWithIndex(): JavaPairRDD[Row, Long]

Definition Classes
JavaRDDLike
def zipWithUniqueId(): JavaPairRDD[Row, Long]

Definition Classes
JavaRDDLike

class JavaSchemaRDD extends JavaRDDLike[Row, JavaRDD[Row]] with SchemaRDDLike

Instance Constructors

new JavaSchemaRDD(sqlContext: SQLContext, baseLogicalPlan: LogicalPlan)

Value Members

final def !=(arg0: AnyRef): Boolean

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: AnyRef): Boolean

final def ==(arg0: Any): Boolean

def aggregate[U](zeroValue: U)(seqOp: Function2[U, Row, U], combOp: Function2[U, U, U]): U

final def asInstanceOf[T0]: T0

val baseLogicalPlan: LogicalPlan

def cache(): JavaSchemaRDD

def cartesian[U](other: JavaRDDLike[U, _]): JavaPairRDD[Row, U]

def checkpoint(): Unit

val classTag: ClassTag[Row]

def clone(): AnyRef

def coalesce(numPartitions: Int, shuffle: Boolean = false): JavaSchemaRDD

def collect(): List[Row]

def collectPartitions(partitionIds: Array[Int]): Array[List[Row]]

def context: SparkContext

def count(): Long

def countApprox(timeout: Long): PartialResult[BoundedDouble]

def countApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble]

def countApproxDistinct(relativeSD: Double): Long

def countByValue(): Map[Row, Long]

def countByValueApprox(timeout: Long): PartialResult[Map[Row, BoundedDouble]]

def countByValueApprox(timeout: Long, confidence: Double): PartialResult[Map[Row, BoundedDouble]]

def distinct(numPartitions: Int): JavaSchemaRDD

def distinct(): JavaSchemaRDD

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def filter(f: Function[Row, Boolean]): JavaSchemaRDD

def finalize(): Unit

def first(): Row

def flatMap[U](f: FlatMapFunction[Row, U]): JavaRDD[U]

def flatMapToDouble(f: DoubleFlatMapFunction[Row]): JavaDoubleRDD

def flatMapToPair[K2, V2](f: PairFlatMapFunction[Row, K2, V2]): JavaPairRDD[K2, V2]

def fold(zeroValue: Row)(f: Function2[Row, Row, Row]): Row

def foreach(f: VoidFunction[Row]): Unit

def foreachPartition(f: VoidFunction[Iterator[Row]]): Unit

def getCheckpointFile(): Optional[String]

final def getClass(): Class[_]

def getStorageLevel: StorageLevel

def glom(): JavaRDD[List[Row]]

def groupBy[K](f: Function[Row, K], numPartitions: Int): JavaPairRDD[K, Iterable[Row]]

def groupBy[K](f: Function[Row, K]): JavaPairRDD[K, Iterable[Row]]

def hashCode(): Int

def id: Int

def insertInto(tableName: String): Unit

def insertInto(tableName: String, overwrite: Boolean): Unit

def intersection(other: JavaSchemaRDD, numPartitions: Int): JavaSchemaRDD

def intersection(other: JavaSchemaRDD, partitioner: Partitioner): JavaSchemaRDD

def intersection(other: JavaSchemaRDD): JavaSchemaRDD

def isCheckpointed: Boolean

final def isInstanceOf[T0]: Boolean

def iterator(split: Partition, taskContext: TaskContext): Iterator[Row]

def keyBy[K](f: Function[Row, K]): JavaPairRDD[K, Row]

val logicalPlan: LogicalPlan

def map[R](f: Function[Row, R]): JavaRDD[R]

def mapPartitions[U](f: FlatMapFunction[Iterator[Row], U], preservesPartitioning: Boolean): JavaRDD[U]

def mapPartitions[U](f: FlatMapFunction[Iterator[Row], U]): JavaRDD[U]

def mapPartitionsToDouble(f: DoubleFlatMapFunction[Iterator[Row]], preservesPartitioning: Boolean): JavaDoubleRDD

def mapPartitionsToDouble(f: DoubleFlatMapFunction[Iterator[Row]]): JavaDoubleRDD

def mapPartitionsToPair[K2, V2](f: PairFlatMapFunction[Iterator[Row], K2, V2], preservesPartitioning: Boolean): JavaPairRDD[K2, V2]

def mapPartitionsToPair[K2, V2](f: PairFlatMapFunction[Iterator[Row], K2, V2]): JavaPairRDD[K2, V2]

def mapPartitionsWithIndex[R](f: Function2[Integer, Iterator[Row], Iterator[R]], preservesPartitioning: Boolean): JavaRDD[R]

def mapToDouble[R](f: DoubleFunction[Row]): JavaDoubleRDD

def mapToPair[K2, V2](f: PairFunction[Row, K2, V2]): JavaPairRDD[K2, V2]

def max(comp: Comparator[Row]): Row

def min(comp: Comparator[Row]): Row

def name(): String

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def persist(newLevel: StorageLevel): JavaSchemaRDD

def persist(): JavaSchemaRDD

def pipe(command: List[String], env: Map[String, String]): JavaRDD[String]

def pipe(command: List[String]): JavaRDD[String]