class ShuffledRowRDD extends RDD[InternalRow]
This is a specialized version of org.apache.spark.rdd.ShuffledRDD that is optimized for shuffling rows instead of Java key-value pairs. Note that something like this should eventually be implemented in Spark core, but that is blocked by some more general refactorings to shuffle interfaces / internals.
This RDD takes a ShuffleDependency (dependency
),
and an array of ShufflePartitionSpec as input arguments.
The dependency
has the parent RDD of this RDD, which represents the dataset before shuffle
(i.e. map output). Elements of this RDD are (partitionId, Row) pairs.
Partition ids should be in the range [0, numPartitions - 1].
dependency.partitioner
is the original partitioner used to partition
map output, and dependency.partitioner.numPartitions
is the number of pre-shuffle partitions
(i.e. the number of partitions of the map output).
- Alphabetic
- By Inheritance
- ShuffledRowRDD
- RDD
- Logging
- Serializable
- AnyRef
- Any
- Hide All
- Show All
- Public
- Protected
Instance Constructors
- new ShuffledRowRDD(dependency: ShuffleDependency[Int, InternalRow, InternalRow], metrics: Map[String, SQLMetric])
- new ShuffledRowRDD(dependency: ShuffleDependency[Int, InternalRow, InternalRow], metrics: Map[String, SQLMetric], partitionSpecs: Array[ShufflePartitionSpec])
Value Members
- final def !=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- final def ##: Int
- Definition Classes
- AnyRef → Any
- def ++(other: RDD[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- final def ==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- def aggregate[U](zeroValue: U)(seqOp: (U, InternalRow) => U, combOp: (U, U) => U)(implicit arg0: ClassTag[U]): U
- Definition Classes
- RDD
- final def asInstanceOf[T0]: T0
- Definition Classes
- Any
- def barrier(): RDDBarrier[InternalRow]
- Definition Classes
- RDD
- Annotations
- @Experimental() @Since("2.4.0")
- def cache(): ShuffledRowRDD.this.type
- Definition Classes
- RDD
- def cartesian[U](other: RDD[U])(implicit arg0: ClassTag[U]): RDD[(InternalRow, U)]
- Definition Classes
- RDD
- def checkpoint(): Unit
- Definition Classes
- RDD
- def cleanShuffleDependencies(blocking: Boolean): Unit
- Definition Classes
- RDD
- Annotations
- @DeveloperApi() @Since("3.1.0")
- def clearDependencies(): Unit
- Definition Classes
- ShuffledRowRDD → RDD
- def clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.CloneNotSupportedException]) @native()
- def coalesce(numPartitions: Int, shuffle: Boolean, partitionCoalescer: Option[PartitionCoalescer])(implicit ord: Ordering[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- def collect[U](f: PartialFunction[InternalRow, U])(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
- def collect(): Array[InternalRow]
- Definition Classes
- RDD
- def compute(split: Partition, context: TaskContext): Iterator[InternalRow]
- Definition Classes
- ShuffledRowRDD → RDD
- def context: SparkContext
- Definition Classes
- RDD
- def count(): Long
- Definition Classes
- RDD
- def countApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble]
- Definition Classes
- RDD
- def countApproxDistinct(relativeSD: Double): Long
- Definition Classes
- RDD
- def countApproxDistinct(p: Int, sp: Int): Long
- Definition Classes
- RDD
- def countByValue()(implicit ord: Ordering[InternalRow]): Map[InternalRow, Long]
- Definition Classes
- RDD
- def countByValueApprox(timeout: Long, confidence: Double)(implicit ord: Ordering[InternalRow]): PartialResult[Map[InternalRow, BoundedDouble]]
- Definition Classes
- RDD
- final def dependencies: Seq[Dependency[_]]
- Definition Classes
- RDD
- var dependency: ShuffleDependency[Int, InternalRow, InternalRow]
- def distinct(): RDD[InternalRow]
- Definition Classes
- RDD
- def distinct(numPartitions: Int)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- final def eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- def equals(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef → Any
- def filter(f: (InternalRow) => Boolean): RDD[InternalRow]
- Definition Classes
- RDD
- def finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.Throwable])
- def first(): InternalRow
- Definition Classes
- RDD
- def firstParent[U](implicit arg0: ClassTag[U]): RDD[U]
- Attributes
- protected[spark]
- Definition Classes
- RDD
- def flatMap[U](f: (InternalRow) => TraversableOnce[U])(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
- def fold(zeroValue: InternalRow)(op: (InternalRow, InternalRow) => InternalRow): InternalRow
- Definition Classes
- RDD
- def foreach(f: (InternalRow) => Unit): Unit
- Definition Classes
- RDD
- def foreachPartition(f: (Iterator[InternalRow]) => Unit): Unit
- Definition Classes
- RDD
- def getCheckpointFile: Option[String]
- Definition Classes
- RDD
- final def getClass(): Class[_ <: AnyRef]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- def getDependencies: Seq[Dependency[_]]
- Definition Classes
- ShuffledRowRDD → RDD
- final def getNumPartitions: Int
- Definition Classes
- RDD
- Annotations
- @Since("1.6.0")
- def getOutputDeterministicLevel: rdd.DeterministicLevel.Value
- Attributes
- protected
- Definition Classes
- RDD
- Annotations
- @DeveloperApi()
- def getPartitions: Array[Partition]
- Definition Classes
- ShuffledRowRDD → RDD
- def getPreferredLocations(partition: Partition): Seq[String]
- Definition Classes
- ShuffledRowRDD → RDD
- def getResourceProfile(): ResourceProfile
- Definition Classes
- RDD
- Annotations
- @Experimental() @Since("3.1.0")
- def getStorageLevel: StorageLevel
- Definition Classes
- RDD
- def glom(): RDD[Array[InternalRow]]
- Definition Classes
- RDD
- def groupBy[K](f: (InternalRow) => K, p: Partitioner)(implicit kt: ClassTag[K], ord: Ordering[K]): RDD[(K, Iterable[InternalRow])]
- Definition Classes
- RDD
- def groupBy[K](f: (InternalRow) => K, numPartitions: Int)(implicit kt: ClassTag[K]): RDD[(K, Iterable[InternalRow])]
- Definition Classes
- RDD
- def groupBy[K](f: (InternalRow) => K)(implicit kt: ClassTag[K]): RDD[(K, Iterable[InternalRow])]
- Definition Classes
- RDD
- def hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- val id: Int
- Definition Classes
- RDD
- def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
- def initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def intersection(other: RDD[InternalRow], numPartitions: Int): RDD[InternalRow]
- Definition Classes
- RDD
- def intersection(other: RDD[InternalRow], partitioner: Partitioner)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- def intersection(other: RDD[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- lazy val isBarrier_: Boolean
- Attributes
- protected
- Definition Classes
- RDD
- Annotations
- @transient()
- def isCheckpointed: Boolean
- Definition Classes
- RDD
- def isEmpty(): Boolean
- Definition Classes
- RDD
- final def isInstanceOf[T0]: Boolean
- Definition Classes
- Any
- def isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
- final def iterator(split: Partition, context: TaskContext): Iterator[InternalRow]
- Definition Classes
- RDD
- def keyBy[K](f: (InternalRow) => K): RDD[(K, InternalRow)]
- Definition Classes
- RDD
- def localCheckpoint(): ShuffledRowRDD.this.type
- Definition Classes
- RDD
- def log: Logger
- Attributes
- protected
- Definition Classes
- Logging
- def logDebug(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logDebug(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logError(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logError(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logInfo(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logInfo(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logName: String
- Attributes
- protected
- Definition Classes
- Logging
- def logTrace(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logTrace(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logWarning(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logWarning(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def map[U](f: (InternalRow) => U)(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
- def mapPartitions[U](f: (Iterator[InternalRow]) => Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
- def mapPartitionsWithEvaluator[U](evaluatorFactory: PartitionEvaluatorFactory[InternalRow, U])(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
- Annotations
- @DeveloperApi() @Since("3.5.0")
- def mapPartitionsWithIndex[U](f: (Int, Iterator[InternalRow]) => Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
- def max()(implicit ord: Ordering[InternalRow]): InternalRow
- Definition Classes
- RDD
- def min()(implicit ord: Ordering[InternalRow]): InternalRow
- Definition Classes
- RDD
- var name: String
- Definition Classes
- RDD
- final def ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- final def notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- final def notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- def parent[U](j: Int)(implicit arg0: ClassTag[U]): RDD[U]
- Attributes
- protected[spark]
- Definition Classes
- RDD
- val partitioner: Option[Partitioner]
- Definition Classes
- ShuffledRowRDD → RDD
- final def partitions: Array[Partition]
- Definition Classes
- RDD
- def persist(): ShuffledRowRDD.this.type
- Definition Classes
- RDD
- def persist(newLevel: StorageLevel): ShuffledRowRDD.this.type
- Definition Classes
- RDD
- def pipe(command: Seq[String], env: Map[String, String], printPipeContext: ((String) => Unit) => Unit, printRDDElement: (InternalRow, (String) => Unit) => Unit, separateWorkingDir: Boolean, bufferSize: Int, encoding: String): RDD[String]
- Definition Classes
- RDD
- def pipe(command: String, env: Map[String, String]): RDD[String]
- Definition Classes
- RDD
- def pipe(command: String): RDD[String]
- Definition Classes
- RDD
- final def preferredLocations(split: Partition): Seq[String]
- Definition Classes
- RDD
- def randomSplit(weights: Array[Double], seed: Long): Array[RDD[InternalRow]]
- Definition Classes
- RDD
- def reduce(f: (InternalRow, InternalRow) => InternalRow): InternalRow
- Definition Classes
- RDD
- def repartition(numPartitions: Int)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- def sample(withReplacement: Boolean, fraction: Double, seed: Long): RDD[InternalRow]
- Definition Classes
- RDD
- def saveAsObjectFile(path: String): Unit
- Definition Classes
- RDD
- def saveAsTextFile(path: String, codec: Class[_ <: CompressionCodec]): Unit
- Definition Classes
- RDD
- def saveAsTextFile(path: String): Unit
- Definition Classes
- RDD
- def setName(_name: String): ShuffledRowRDD.this.type
- Definition Classes
- RDD
- def sortBy[K](f: (InternalRow) => K, ascending: Boolean, numPartitions: Int)(implicit ord: Ordering[K], ctag: ClassTag[K]): RDD[InternalRow]
- Definition Classes
- RDD
- def sparkContext: SparkContext
- Definition Classes
- RDD
- def subtract(other: RDD[InternalRow], p: Partitioner)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- def subtract(other: RDD[InternalRow], numPartitions: Int): RDD[InternalRow]
- Definition Classes
- RDD
- def subtract(other: RDD[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- final def synchronized[T0](arg0: => T0): T0
- Definition Classes
- AnyRef
- def take(num: Int): Array[InternalRow]
- Definition Classes
- RDD
- def takeOrdered(num: Int)(implicit ord: Ordering[InternalRow]): Array[InternalRow]
- Definition Classes
- RDD
- def takeSample(withReplacement: Boolean, num: Int, seed: Long): Array[InternalRow]
- Definition Classes
- RDD
- def toDebugString: String
- Definition Classes
- RDD
- def toJavaRDD(): JavaRDD[InternalRow]
- Definition Classes
- RDD
- def toLocalIterator: Iterator[InternalRow]
- Definition Classes
- RDD
- def toString(): String
- Definition Classes
- RDD → AnyRef → Any
- def top(num: Int)(implicit ord: Ordering[InternalRow]): Array[InternalRow]
- Definition Classes
- RDD
- def treeAggregate[U](zeroValue: U, seqOp: (U, InternalRow) => U, combOp: (U, U) => U, depth: Int, finalAggregateOnExecutor: Boolean)(implicit arg0: ClassTag[U]): U
- Definition Classes
- RDD
- def treeAggregate[U](zeroValue: U)(seqOp: (U, InternalRow) => U, combOp: (U, U) => U, depth: Int)(implicit arg0: ClassTag[U]): U
- Definition Classes
- RDD
- def treeReduce(f: (InternalRow, InternalRow) => InternalRow, depth: Int): InternalRow
- Definition Classes
- RDD
- def union(other: RDD[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- def unpersist(blocking: Boolean): ShuffledRowRDD.this.type
- Definition Classes
- RDD
- final def wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException])
- final def wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException])
- final def wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException]) @native()
- def withResources(rp: ResourceProfile): ShuffledRowRDD.this.type
- Definition Classes
- RDD
- Annotations
- @Experimental() @Since("3.1.0")
- def zip[U](other: RDD[U])(implicit arg0: ClassTag[U]): RDD[(InternalRow, U)]
- Definition Classes
- RDD
- def zipPartitions[B, C, D, V](rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D])(f: (Iterator[InternalRow], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[D], arg3: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
- def zipPartitions[B, C, D, V](rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D], preservesPartitioning: Boolean)(f: (Iterator[InternalRow], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[D], arg3: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
- def zipPartitions[B, C, V](rdd2: RDD[B], rdd3: RDD[C])(f: (Iterator[InternalRow], Iterator[B], Iterator[C]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
- def zipPartitions[B, C, V](rdd2: RDD[B], rdd3: RDD[C], preservesPartitioning: Boolean)(f: (Iterator[InternalRow], Iterator[B], Iterator[C]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
- def zipPartitions[B, V](rdd2: RDD[B])(f: (Iterator[InternalRow], Iterator[B]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
- def zipPartitions[B, V](rdd2: RDD[B], preservesPartitioning: Boolean)(f: (Iterator[InternalRow], Iterator[B]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
- def zipPartitionsWithEvaluator[U](rdd2: RDD[InternalRow], evaluatorFactory: PartitionEvaluatorFactory[InternalRow, U])(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
- Annotations
- @DeveloperApi() @Since("3.5.0")
- def zipWithIndex(): RDD[(InternalRow, Long)]
- Definition Classes
- RDD
- def zipWithUniqueId(): RDD[(InternalRow, Long)]
- Definition Classes
- RDD