ShuffledRowRDD

Instance Constructors

new ShuffledRowRDD(prev: RDD[Product2[Int, InternalRow]], serializer: Serializer, numPartitions: Int)

prev
the RDD being shuffled. Elements of this RDD are (partitionId, Row) pairs. Partition ids should be in the range [0, numPartitions - 1].
serializer
the serializer used during the shuffle.
numPartitions
the number of post-shuffle partitions.

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
def ++(other: RDD[InternalRow]): RDD[InternalRow]

Definition Classes
RDD
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def aggregate[U](zeroValue: U)(seqOp: (U, InternalRow) ⇒ U, combOp: (U, U) ⇒ U)(implicit arg0: ClassTag[U]): U

Definition Classes
RDD
final def asInstanceOf[T0]: T0

Definition Classes
Any
def cache(): ShuffledRowRDD.this.type

Definition Classes
RDD
def cartesian[U](other: RDD[U])(implicit arg0: ClassTag[U]): RDD[(InternalRow, U)]

Definition Classes
RDD
def checkpoint(): Unit

Definition Classes
RDD
def clearDependencies(): Unit

Definition Classes
ShuffledRowRDD → RDD
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def coalesce(numPartitions: Int, shuffle: Boolean)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]

Definition Classes
RDD
def collect[U](f: PartialFunction[InternalRow, U])(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
def collect(): Array[InternalRow]

Definition Classes
RDD
def compute(split: Partition, context: TaskContext): Iterator[InternalRow]

Definition Classes
ShuffledRowRDD → RDD
def context: SparkContext

Definition Classes
RDD
def count(): Long

Definition Classes
RDD
def countApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble]

Definition Classes
RDD
Annotations
@Experimental()
def countApproxDistinct(relativeSD: Double): Long

Definition Classes
RDD
def countApproxDistinct(p: Int, sp: Int): Long

Definition Classes
RDD
Annotations
@Experimental()
def countByValue()(implicit ord: Ordering[InternalRow]): Map[InternalRow, Long]

Definition Classes
RDD
def countByValueApprox(timeout: Long, confidence: Double)(implicit ord: Ordering[InternalRow]): PartialResult[Map[InternalRow, BoundedDouble]]

Definition Classes
RDD
Annotations
@Experimental()
final def dependencies: Seq[Dependency[_]]

Definition Classes
RDD
def distinct(): RDD[InternalRow]

Definition Classes
RDD
def distinct(numPartitions: Int)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]

Definition Classes
RDD
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def filter(f: (InternalRow) ⇒ Boolean): RDD[InternalRow]

Definition Classes
RDD
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def first(): InternalRow

Definition Classes
RDD
def firstParent[U](implicit arg0: ClassTag[U]): RDD[U]

Attributes
protected[org.apache.spark]
Definition Classes
RDD
def flatMap[U](f: (InternalRow) ⇒ TraversableOnce[U])(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
def fold(zeroValue: InternalRow)(op: (InternalRow, InternalRow) ⇒ InternalRow): InternalRow

Definition Classes
RDD
def foreach(f: (InternalRow) ⇒ Unit): Unit

Definition Classes
RDD
def foreachPartition(f: (Iterator[InternalRow]) ⇒ Unit): Unit

Definition Classes
RDD
def getCheckpointFile: Option[String]

Definition Classes
RDD
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getDependencies: Seq[Dependency[_]]

Definition Classes
ShuffledRowRDD → RDD
def getPartitions: Array[Partition]

Definition Classes
ShuffledRowRDD → RDD
def getPreferredLocations(split: Partition): Seq[String]

Attributes
protected
Definition Classes
RDD
def getStorageLevel: StorageLevel

Definition Classes
RDD
def glom(): RDD[Array[InternalRow]]

Definition Classes
RDD
def groupBy[K](f: (InternalRow) ⇒ K, p: Partitioner)(implicit kt: ClassTag[K], ord: Ordering[K]): RDD[(K, Iterable[InternalRow])]

Definition Classes
RDD
def groupBy[K](f: (InternalRow) ⇒ K, numPartitions: Int)(implicit kt: ClassTag[K]): RDD[(K, Iterable[InternalRow])]

Definition Classes
RDD
def groupBy[K](f: (InternalRow) ⇒ K)(implicit kt: ClassTag[K]): RDD[(K, Iterable[InternalRow])]

Definition Classes
RDD
def hashCode(): Int

Definition Classes
AnyRef → Any
val id: Int

Definition Classes
RDD
def intersection(other: RDD[InternalRow], numPartitions: Int): RDD[InternalRow]

Definition Classes
RDD
def intersection(other: RDD[InternalRow], partitioner: Partitioner)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]

Definition Classes
RDD
def intersection(other: RDD[InternalRow]): RDD[InternalRow]

Definition Classes
RDD
def isCheckpointed: Boolean

Definition Classes
RDD
def isEmpty(): Boolean

Definition Classes
RDD
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def isTraceEnabled(): Boolean

Attributes
protected
Definition Classes
Logging
final def iterator(split: Partition, context: TaskContext): Iterator[InternalRow]

Definition Classes
RDD
def keyBy[K](f: (InternalRow) ⇒ K): RDD[(K, InternalRow)]

Definition Classes
RDD
def localCheckpoint(): ShuffledRowRDD.this.type

Definition Classes
RDD
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def map[U](f: (InternalRow) ⇒ U)(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
def mapPartitions[U](f: (Iterator[InternalRow]) ⇒ Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
def mapPartitionsWithIndex[U](f: (Int, Iterator[InternalRow]) ⇒ Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
def max()(implicit ord: Ordering[InternalRow]): InternalRow

Definition Classes
RDD
def min()(implicit ord: Ordering[InternalRow]): InternalRow

Definition Classes
RDD
var name: String

Definition Classes
RDD
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def parent[U](j: Int)(implicit arg0: ClassTag[U]): RDD[U]

Attributes
protected[org.apache.spark]
Definition Classes
RDD
val partitioner: Some[Partitioner]

Definition Classes
ShuffledRowRDD → RDD
final def partitions: Array[Partition]

Definition Classes
RDD
def persist(): ShuffledRowRDD.this.type

Definition Classes
RDD
def persist(newLevel: StorageLevel): ShuffledRowRDD.this.type

Definition Classes
RDD
def pipe(command: Seq[String], env: Map[String, String], printPipeContext: ((String) ⇒ Unit) ⇒ Unit, printRDDElement: (InternalRow, (String) ⇒ Unit) ⇒ Unit, separateWorkingDir: Boolean): RDD[String]

Definition Classes
RDD
def pipe(command: String, env: Map[String, String]): RDD[String]

Definition Classes
RDD
def pipe(command: String): RDD[String]

Definition Classes
RDD
final def preferredLocations(split: Partition): Seq[String]

Definition Classes
RDD
var prev: RDD[Product2[Int, InternalRow]]

the RDD being shuffled.
the RDD being shuffled. Elements of this RDD are (partitionId, Row) pairs. Partition ids should be in the range [0, numPartitions - 1].
def randomSplit(weights: Array[Double], seed: Long): Array[RDD[InternalRow]]

Definition Classes
RDD
def reduce(f: (InternalRow, InternalRow) ⇒ InternalRow): InternalRow

Definition Classes
RDD
def repartition(numPartitions: Int)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]

Definition Classes
RDD
def sample(withReplacement: Boolean, fraction: Double, seed: Long): RDD[InternalRow]

Definition Classes
RDD
def saveAsObjectFile(path: String): Unit

Definition Classes
RDD
def saveAsTextFile(path: String, codec: Class[_ <: CompressionCodec]): Unit

Definition Classes
RDD
def saveAsTextFile(path: String): Unit

Definition Classes
RDD
def setName(_name: String): ShuffledRowRDD.this.type

Definition Classes
RDD
def sortBy[K](f: (InternalRow) ⇒ K, ascending: Boolean, numPartitions: Int)(implicit ord: Ordering[K], ctag: ClassTag[K]): RDD[InternalRow]

Definition Classes
RDD
def sparkContext: SparkContext

Definition Classes
RDD
def subtract(other: RDD[InternalRow], p: Partitioner)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]

Definition Classes
RDD
def subtract(other: RDD[InternalRow], numPartitions: Int): RDD[InternalRow]

Definition Classes
RDD
def subtract(other: RDD[InternalRow]): RDD[InternalRow]

Definition Classes
RDD
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def take(num: Int): Array[InternalRow]

Definition Classes
RDD
def takeOrdered(num: Int)(implicit ord: Ordering[InternalRow]): Array[InternalRow]

Definition Classes
RDD
def takeSample(withReplacement: Boolean, num: Int, seed: Long): Array[InternalRow]

Definition Classes
RDD
def toDebugString: String

Definition Classes
RDD
def toJavaRDD(): JavaRDD[InternalRow]

Definition Classes
RDD
def toLocalIterator: Iterator[InternalRow]

Definition Classes
RDD
def toString(): String

Definition Classes
RDD → AnyRef → Any
def top(num: Int)(implicit ord: Ordering[InternalRow]): Array[InternalRow]

Definition Classes
RDD
def treeAggregate[U](zeroValue: U)(seqOp: (U, InternalRow) ⇒ U, combOp: (U, U) ⇒ U, depth: Int)(implicit arg0: ClassTag[U]): U

Definition Classes
RDD
def treeReduce(f: (InternalRow, InternalRow) ⇒ InternalRow, depth: Int): InternalRow

Definition Classes
RDD
def union(other: RDD[InternalRow]): RDD[InternalRow]

Definition Classes
RDD
def unpersist(blocking: Boolean): ShuffledRowRDD.this.type

Definition Classes
RDD
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
def zip[U](other: RDD[U])(implicit arg0: ClassTag[U]): RDD[(InternalRow, U)]

Definition Classes
RDD
def zipPartitions[B, C, D, V](rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D])(f: (Iterator[InternalRow], Iterator[B], Iterator[C], Iterator[D]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[D], arg3: ClassTag[V]): RDD[V]

Definition Classes
RDD
def zipPartitions[B, C, D, V](rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D], preservesPartitioning: Boolean)(f: (Iterator[InternalRow], Iterator[B], Iterator[C], Iterator[D]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[D], arg3: ClassTag[V]): RDD[V]

Definition Classes
RDD
def zipPartitions[B, C, V](rdd2: RDD[B], rdd3: RDD[C])(f: (Iterator[InternalRow], Iterator[B], Iterator[C]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[V]): RDD[V]

Definition Classes
RDD
def zipPartitions[B, C, V](rdd2: RDD[B], rdd3: RDD[C], preservesPartitioning: Boolean)(f: (Iterator[InternalRow], Iterator[B], Iterator[C]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[V]): RDD[V]

Definition Classes
RDD
def zipPartitions[B, V](rdd2: RDD[B])(f: (Iterator[InternalRow], Iterator[B]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[V]): RDD[V]

Definition Classes
RDD
def zipPartitions[B, V](rdd2: RDD[B], preservesPartitioning: Boolean)(f: (Iterator[InternalRow], Iterator[B]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[V]): RDD[V]

Definition Classes
RDD
def zipWithIndex(): RDD[(InternalRow, Long)]

Definition Classes
RDD
def zipWithUniqueId(): RDD[(InternalRow, Long)]

Definition Classes
RDD

Deprecated Value Members

def filterWith[A](constructA: (Int) ⇒ A)(p: (InternalRow, A) ⇒ Boolean): RDD[InternalRow]

Definition Classes
RDD
Annotations
@deprecated
Deprecated
(Since version 1.0.0) use mapPartitionsWithIndex and filter
def flatMapWith[A, U](constructA: (Int) ⇒ A, preservesPartitioning: Boolean)(f: (InternalRow, A) ⇒ Seq[U])(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
Annotations
@deprecated
Deprecated
(Since version 1.0.0) use mapPartitionsWithIndex and flatMap
def foreachWith[A](constructA: (Int) ⇒ A)(f: (InternalRow, A) ⇒ Unit): Unit

Definition Classes
RDD
Annotations
@deprecated
Deprecated
(Since version 1.0.0) use mapPartitionsWithIndex and foreach
def mapPartitionsWithContext[U](f: (TaskContext, Iterator[InternalRow]) ⇒ Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
Annotations
@DeveloperApi() @deprecated
Deprecated
(Since version 1.2.0) use TaskContext.get
def mapPartitionsWithSplit[U](f: (Int, Iterator[InternalRow]) ⇒ Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
Annotations
@deprecated
Deprecated
(Since version 0.7.0) use mapPartitionsWithIndex
def mapWith[A, U](constructA: (Int) ⇒ A, preservesPartitioning: Boolean)(f: (InternalRow, A) ⇒ U)(implicit arg0: ClassTag[U]): RDD[U]

Definition Classes
RDD
Annotations
@deprecated
Deprecated
(Since version 1.0.0) use mapPartitionsWithIndex
def toArray(): Array[InternalRow]

Definition Classes
RDD
Annotations
@deprecated
Deprecated
(Since version 1.0.0) use collect

Related Doc: package execution

class ShuffledRowRDD extends RDD[InternalRow]

Instance Constructors

new ShuffledRowRDD(prev: RDD[Product2[Int, InternalRow]], serializer: Serializer, numPartitions: Int)

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

def ++(other: RDD[InternalRow]): RDD[InternalRow]

final def ==(arg0: Any): Boolean

def aggregate[U](zeroValue: U)(seqOp: (U, InternalRow) ⇒ U, combOp: (U, U) ⇒ U)(implicit arg0: ClassTag[U]): U

final def asInstanceOf[T0]: T0

def cache(): ShuffledRowRDD.this.type

def cartesian[U](other: RDD[U])(implicit arg0: ClassTag[U]): RDD[(InternalRow, U)]

def checkpoint(): Unit

def clearDependencies(): Unit

def clone(): AnyRef

def coalesce(numPartitions: Int, shuffle: Boolean)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]

def collect[U](f: PartialFunction[InternalRow, U])(implicit arg0: ClassTag[U]): RDD[U]

def collect(): Array[InternalRow]

def compute(split: Partition, context: TaskContext): Iterator[InternalRow]

def context: SparkContext

def count(): Long

def countApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble]

def countApproxDistinct(relativeSD: Double): Long

def countApproxDistinct(p: Int, sp: Int): Long

def countByValue()(implicit ord: Ordering[InternalRow]): Map[InternalRow, Long]

def countByValueApprox(timeout: Long, confidence: Double)(implicit ord: Ordering[InternalRow]): PartialResult[Map[InternalRow, BoundedDouble]]

final def dependencies: Seq[Dependency[_]]

def distinct(): RDD[InternalRow]

def distinct(numPartitions: Int)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def filter(f: (InternalRow) ⇒ Boolean): RDD[InternalRow]

def finalize(): Unit

def first(): InternalRow

def firstParent[U](implicit arg0: ClassTag[U]): RDD[U]

def flatMap[U](f: (InternalRow) ⇒ TraversableOnce[U])(implicit arg0: ClassTag[U]): RDD[U]

def fold(zeroValue: InternalRow)(op: (InternalRow, InternalRow) ⇒ InternalRow): InternalRow

def foreach(f: (InternalRow) ⇒ Unit): Unit

def foreachPartition(f: (Iterator[InternalRow]) ⇒ Unit): Unit

def getCheckpointFile: Option[String]

final def getClass(): Class[_]

def getDependencies: Seq[Dependency[_]]

def getPartitions: Array[Partition]

def getPreferredLocations(split: Partition): Seq[String]

def getStorageLevel: StorageLevel

def glom(): RDD[Array[InternalRow]]

def groupBy[K](f: (InternalRow) ⇒ K, p: Partitioner)(implicit kt: ClassTag[K], ord: Ordering[K]): RDD[(K, Iterable[InternalRow])]

def groupBy[K](f: (InternalRow) ⇒ K, numPartitions: Int)(implicit kt: ClassTag[K]): RDD[(K, Iterable[InternalRow])]

def groupBy[K](f: (InternalRow) ⇒ K)(implicit kt: ClassTag[K]): RDD[(K, Iterable[InternalRow])]

def hashCode(): Int

val id: Int

def intersection(other: RDD[InternalRow], numPartitions: Int): RDD[InternalRow]

def intersection(other: RDD[InternalRow], partitioner: Partitioner)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]

def intersection(other: RDD[InternalRow]): RDD[InternalRow]

def isCheckpointed: Boolean

def isEmpty(): Boolean

final def isInstanceOf[T0]: Boolean

def isTraceEnabled(): Boolean

final def iterator(split: Partition, context: TaskContext): Iterator[InternalRow]

def keyBy[K](f: (InternalRow) ⇒ K): RDD[(K, InternalRow)]

def localCheckpoint(): ShuffledRowRDD.this.type

def log: Logger

def logDebug(msg: ⇒ String, throwable: Throwable): Unit

def logDebug(msg: ⇒ String): Unit

def logError(msg: ⇒ String, throwable: Throwable): Unit

def logError(msg: ⇒ String): Unit

def logInfo(msg: ⇒ String, throwable: Throwable): Unit

def logInfo(msg: ⇒ String): Unit

def logName: String

def logTrace(msg: ⇒ String, throwable: Throwable): Unit

def logTrace(msg: ⇒ String): Unit

def logWarning(msg: ⇒ String, throwable: Throwable): Unit

def logWarning(msg: ⇒ String): Unit

def map[U](f: (InternalRow) ⇒ U)(implicit arg0: ClassTag[U]): RDD[U]

def mapPartitions[U](f: (Iterator[InternalRow]) ⇒ Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]

def mapPartitionsWithIndex[U](f: (Int, Iterator[InternalRow]) ⇒ Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]

def max()(implicit ord: Ordering[InternalRow]): InternalRow

def min()(implicit ord: Ordering[InternalRow]): InternalRow