Packages

class ShuffledRowRDD extends RDD[InternalRow]

This is a specialized version of org.apache.spark.rdd.ShuffledRDD that is optimized for shuffling rows instead of Java key-value pairs. Note that something like this should eventually be implemented in Spark core, but that is blocked by some more general refactorings to shuffle interfaces / internals.

This RDD takes a ShuffleDependency (dependency), and an array of ShufflePartitionSpec as input arguments.

The dependency has the parent RDD of this RDD, which represents the dataset before shuffle (i.e. map output). Elements of this RDD are (partitionId, Row) pairs. Partition ids should be in the range [0, numPartitions - 1]. dependency.partitioner is the original partitioner used to partition map output, and dependency.partitioner.numPartitions is the number of pre-shuffle partitions (i.e. the number of partitions of the map output).

Linear Supertypes
RDD[InternalRow], Logging, Serializable, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. ShuffledRowRDD
  2. RDD
  3. Logging
  4. Serializable
  5. AnyRef
  6. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. Protected

Instance Constructors

  1. new ShuffledRowRDD(dependency: ShuffleDependency[Int, InternalRow, InternalRow], metrics: Map[String, SQLMetric])
  2. new ShuffledRowRDD(dependency: ShuffleDependency[Int, InternalRow, InternalRow], metrics: Map[String, SQLMetric], partitionSpecs: Array[ShufflePartitionSpec])

Type Members

  1. implicit class LogStringContext extends AnyRef
    Definition Classes
    Logging

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##: Int
    Definition Classes
    AnyRef → Any
  3. def ++(other: RDD[InternalRow]): RDD[InternalRow]
    Definition Classes
    RDD
  4. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  5. def aggregate[U](zeroValue: U)(seqOp: (U, InternalRow) => U, combOp: (U, U) => U)(implicit arg0: ClassTag[U]): U
    Definition Classes
    RDD
  6. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  7. def barrier(): RDDBarrier[InternalRow]
    Definition Classes
    RDD
    Annotations
    @Experimental() @Since("2.4.0")
  8. def cache(): ShuffledRowRDD.this.type
    Definition Classes
    RDD
  9. def cartesian[U](other: RDD[U])(implicit arg0: ClassTag[U]): RDD[(InternalRow, U)]
    Definition Classes
    RDD
  10. def checkpoint(): Unit
    Definition Classes
    RDD
  11. def cleanShuffleDependencies(blocking: Boolean): Unit
    Definition Classes
    RDD
    Annotations
    @DeveloperApi() @Since("3.1.0")
  12. def clearDependencies(): Unit
    Definition Classes
    ShuffledRowRDD → RDD
  13. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.CloneNotSupportedException]) @IntrinsicCandidate() @native()
  14. def coalesce(numPartitions: Int, shuffle: Boolean, partitionCoalescer: Option[PartitionCoalescer])(implicit ord: Ordering[InternalRow]): RDD[InternalRow]
    Definition Classes
    RDD
  15. def collect[U](f: PartialFunction[InternalRow, U])(implicit arg0: ClassTag[U]): RDD[U]
    Definition Classes
    RDD
  16. def collect(): Array[InternalRow]
    Definition Classes
    RDD
  17. def compute(split: Partition, context: TaskContext): Iterator[InternalRow]
    Definition Classes
    ShuffledRowRDD → RDD
  18. def context: SparkContext
    Definition Classes
    RDD
  19. def count(): Long
    Definition Classes
    RDD
  20. def countApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble]
    Definition Classes
    RDD
  21. def countApproxDistinct(relativeSD: Double): Long
    Definition Classes
    RDD
  22. def countApproxDistinct(p: Int, sp: Int): Long
    Definition Classes
    RDD
  23. def countByValue()(implicit ord: Ordering[InternalRow]): Map[InternalRow, Long]
    Definition Classes
    RDD
  24. def countByValueApprox(timeout: Long, confidence: Double)(implicit ord: Ordering[InternalRow]): PartialResult[Map[InternalRow, BoundedDouble]]
    Definition Classes
    RDD
  25. final def dependencies: Seq[Dependency[_]]
    Definition Classes
    RDD
  26. var dependency: ShuffleDependency[Int, InternalRow, InternalRow]
  27. def distinct(): RDD[InternalRow]
    Definition Classes
    RDD
  28. def distinct(numPartitions: Int)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]
    Definition Classes
    RDD
  29. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  30. def equals(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef → Any
  31. def filter(f: (InternalRow) => Boolean): RDD[InternalRow]
    Definition Classes
    RDD
  32. def first(): InternalRow
    Definition Classes
    RDD
  33. def firstParent[U](implicit arg0: ClassTag[U]): RDD[U]
    Attributes
    protected[spark]
    Definition Classes
    RDD
  34. def flatMap[U](f: (InternalRow) => IterableOnce[U])(implicit arg0: ClassTag[U]): RDD[U]
    Definition Classes
    RDD
  35. def fold(zeroValue: InternalRow)(op: (InternalRow, InternalRow) => InternalRow): InternalRow
    Definition Classes
    RDD
  36. def foreach(f: (InternalRow) => Unit): Unit
    Definition Classes
    RDD
  37. def foreachPartition(f: (Iterator[InternalRow]) => Unit): Unit
    Definition Classes
    RDD
  38. def getCheckpointFile: Option[String]
    Definition Classes
    RDD
  39. final def getClass(): Class[_ <: AnyRef]
    Definition Classes
    AnyRef → Any
    Annotations
    @IntrinsicCandidate() @native()
  40. def getDependencies: Seq[Dependency[_]]
    Definition Classes
    ShuffledRowRDD → RDD
  41. final def getNumPartitions: Int
    Definition Classes
    RDD
    Annotations
    @Since("1.6.0")
  42. def getOutputDeterministicLevel: rdd.DeterministicLevel.Value
    Attributes
    protected
    Definition Classes
    RDD
    Annotations
    @DeveloperApi()
  43. def getPartitions: Array[Partition]
    Definition Classes
    ShuffledRowRDD → RDD
  44. def getPreferredLocations(partition: Partition): Seq[String]
    Definition Classes
    ShuffledRowRDD → RDD
  45. def getResourceProfile(): ResourceProfile
    Definition Classes
    RDD
    Annotations
    @Experimental() @Since("3.1.0")
  46. def getStorageLevel: StorageLevel
    Definition Classes
    RDD
  47. def glom(): RDD[Array[InternalRow]]
    Definition Classes
    RDD
  48. def groupBy[K](f: (InternalRow) => K, p: Partitioner)(implicit kt: ClassTag[K], ord: Ordering[K]): RDD[(K, Iterable[InternalRow])]
    Definition Classes
    RDD
  49. def groupBy[K](f: (InternalRow) => K, numPartitions: Int)(implicit kt: ClassTag[K]): RDD[(K, Iterable[InternalRow])]
    Definition Classes
    RDD
  50. def groupBy[K](f: (InternalRow) => K)(implicit kt: ClassTag[K]): RDD[(K, Iterable[InternalRow])]
    Definition Classes
    RDD
  51. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @IntrinsicCandidate() @native()
  52. val id: Int
    Definition Classes
    RDD
  53. def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
    Attributes
    protected
    Definition Classes
    Logging
  54. def initializeLogIfNecessary(isInterpreter: Boolean): Unit
    Attributes
    protected
    Definition Classes
    Logging
  55. def intersection(other: RDD[InternalRow], numPartitions: Int): RDD[InternalRow]
    Definition Classes
    RDD
  56. def intersection(other: RDD[InternalRow], partitioner: Partitioner)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]
    Definition Classes
    RDD
  57. def intersection(other: RDD[InternalRow]): RDD[InternalRow]
    Definition Classes
    RDD
  58. lazy val isBarrier_: Boolean
    Attributes
    protected
    Definition Classes
    RDD
    Annotations
    @transient()
  59. def isCheckpointed: Boolean
    Definition Classes
    RDD
  60. def isEmpty(): Boolean
    Definition Classes
    RDD
  61. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  62. def isTraceEnabled(): Boolean
    Attributes
    protected
    Definition Classes
    Logging
  63. final def iterator(split: Partition, context: TaskContext): Iterator[InternalRow]
    Definition Classes
    RDD
  64. def keyBy[K](f: (InternalRow) => K): RDD[(K, InternalRow)]
    Definition Classes
    RDD
  65. def localCheckpoint(): ShuffledRowRDD.this.type
    Definition Classes
    RDD
  66. def log: Logger
    Attributes
    protected
    Definition Classes
    Logging
  67. def logDebug(msg: => String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  68. def logDebug(entry: LogEntry, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  69. def logDebug(entry: LogEntry): Unit
    Attributes
    protected
    Definition Classes
    Logging
  70. def logDebug(msg: => String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  71. def logError(msg: => String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  72. def logError(entry: LogEntry, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  73. def logError(entry: LogEntry): Unit
    Attributes
    protected
    Definition Classes
    Logging
  74. def logError(msg: => String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  75. def logInfo(msg: => String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  76. def logInfo(entry: LogEntry, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  77. def logInfo(entry: LogEntry): Unit
    Attributes
    protected
    Definition Classes
    Logging
  78. def logInfo(msg: => String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  79. def logName: String
    Attributes
    protected
    Definition Classes
    Logging
  80. def logTrace(msg: => String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  81. def logTrace(entry: LogEntry, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  82. def logTrace(entry: LogEntry): Unit
    Attributes
    protected
    Definition Classes
    Logging
  83. def logTrace(msg: => String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  84. def logWarning(msg: => String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  85. def logWarning(entry: LogEntry, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  86. def logWarning(entry: LogEntry): Unit
    Attributes
    protected
    Definition Classes
    Logging
  87. def logWarning(msg: => String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  88. def map[U](f: (InternalRow) => U)(implicit arg0: ClassTag[U]): RDD[U]
    Definition Classes
    RDD
  89. def mapPartitions[U](f: (Iterator[InternalRow]) => Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]
    Definition Classes
    RDD
  90. def mapPartitionsWithEvaluator[U](evaluatorFactory: PartitionEvaluatorFactory[InternalRow, U])(implicit arg0: ClassTag[U]): RDD[U]
    Definition Classes
    RDD
    Annotations
    @DeveloperApi() @Since("3.5.0")
  91. def mapPartitionsWithIndex[U](f: (Int, Iterator[InternalRow]) => Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]
    Definition Classes
    RDD
  92. def max()(implicit ord: Ordering[InternalRow]): InternalRow
    Definition Classes
    RDD
  93. def min()(implicit ord: Ordering[InternalRow]): InternalRow
    Definition Classes
    RDD
  94. var name: String
    Definition Classes
    RDD
  95. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  96. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @IntrinsicCandidate() @native()
  97. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @IntrinsicCandidate() @native()
  98. def parent[U](j: Int)(implicit arg0: ClassTag[U]): RDD[U]
    Attributes
    protected[spark]
    Definition Classes
    RDD
  99. val partitioner: Option[Partitioner]
    Definition Classes
    ShuffledRowRDD → RDD
  100. final def partitions: Array[Partition]
    Definition Classes
    RDD
  101. def persist(): ShuffledRowRDD.this.type
    Definition Classes
    RDD
  102. def persist(newLevel: StorageLevel): ShuffledRowRDD.this.type
    Definition Classes
    RDD
  103. def pipe(command: Seq[String], env: Map[String, String], printPipeContext: ((String) => Unit) => Unit, printRDDElement: (InternalRow, (String) => Unit) => Unit, separateWorkingDir: Boolean, bufferSize: Int, encoding: String): RDD[String]
    Definition Classes
    RDD
  104. def pipe(command: String, env: Map[String, String]): RDD[String]
    Definition Classes
    RDD
  105. def pipe(command: String): RDD[String]
    Definition Classes
    RDD
  106. final def preferredLocations(split: Partition): Seq[String]
    Definition Classes
    RDD
  107. def randomSplit(weights: Array[Double], seed: Long): Array[RDD[InternalRow]]
    Definition Classes
    RDD
  108. def reduce(f: (InternalRow, InternalRow) => InternalRow): InternalRow
    Definition Classes
    RDD
  109. def repartition(numPartitions: Int)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]
    Definition Classes
    RDD
  110. def sample(withReplacement: Boolean, fraction: Double, seed: Long): RDD[InternalRow]
    Definition Classes
    RDD
  111. def saveAsObjectFile(path: String): Unit
    Definition Classes
    RDD
  112. def saveAsTextFile(path: String, codec: Class[_ <: CompressionCodec]): Unit
    Definition Classes
    RDD
  113. def saveAsTextFile(path: String): Unit
    Definition Classes
    RDD
  114. def setName(_name: String): ShuffledRowRDD.this.type
    Definition Classes
    RDD
  115. def sortBy[K](f: (InternalRow) => K, ascending: Boolean, numPartitions: Int)(implicit ord: Ordering[K], ctag: ClassTag[K]): RDD[InternalRow]
    Definition Classes
    RDD
  116. def sparkContext: SparkContext
    Definition Classes
    RDD
  117. def subtract(other: RDD[InternalRow], p: Partitioner)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]
    Definition Classes
    RDD
  118. def subtract(other: RDD[InternalRow], numPartitions: Int): RDD[InternalRow]
    Definition Classes
    RDD
  119. def subtract(other: RDD[InternalRow]): RDD[InternalRow]
    Definition Classes
    RDD
  120. final def synchronized[T0](arg0: => T0): T0
    Definition Classes
    AnyRef
  121. def take(num: Int): Array[InternalRow]
    Definition Classes
    RDD
  122. def takeOrdered(num: Int)(implicit ord: Ordering[InternalRow]): Array[InternalRow]
    Definition Classes
    RDD
  123. def takeSample(withReplacement: Boolean, num: Int, seed: Long): Array[InternalRow]
    Definition Classes
    RDD
  124. def toDebugString: String
    Definition Classes
    RDD
  125. def toJavaRDD(): JavaRDD[InternalRow]
    Definition Classes
    RDD
  126. def toLocalIterator: Iterator[InternalRow]
    Definition Classes
    RDD
  127. def toString(): String
    Definition Classes
    RDD → AnyRef → Any
  128. def top(num: Int)(implicit ord: Ordering[InternalRow]): Array[InternalRow]
    Definition Classes
    RDD
  129. def treeAggregate[U](zeroValue: U, seqOp: (U, InternalRow) => U, combOp: (U, U) => U, depth: Int, finalAggregateOnExecutor: Boolean)(implicit arg0: ClassTag[U]): U
    Definition Classes
    RDD
  130. def treeAggregate[U](zeroValue: U)(seqOp: (U, InternalRow) => U, combOp: (U, U) => U, depth: Int)(implicit arg0: ClassTag[U]): U
    Definition Classes
    RDD
  131. def treeReduce(f: (InternalRow, InternalRow) => InternalRow, depth: Int): InternalRow
    Definition Classes
    RDD
  132. def union(other: RDD[InternalRow]): RDD[InternalRow]
    Definition Classes
    RDD
  133. def unpersist(blocking: Boolean): ShuffledRowRDD.this.type
    Definition Classes
    RDD
  134. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.InterruptedException])
  135. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.InterruptedException]) @native()
  136. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.InterruptedException])
  137. def withLogContext(context: HashMap[String, String])(body: => Unit): Unit
    Attributes
    protected
    Definition Classes
    Logging
  138. def withResources(rp: ResourceProfile): ShuffledRowRDD.this.type
    Definition Classes
    RDD
    Annotations
    @Experimental() @Since("3.1.0")
  139. def zip[U](other: RDD[U])(implicit arg0: ClassTag[U]): RDD[(InternalRow, U)]
    Definition Classes
    RDD
  140. def zipPartitions[B, C, D, V](rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D])(f: (Iterator[InternalRow], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[D], arg3: ClassTag[V]): RDD[V]
    Definition Classes
    RDD
  141. def zipPartitions[B, C, D, V](rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D], preservesPartitioning: Boolean)(f: (Iterator[InternalRow], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[D], arg3: ClassTag[V]): RDD[V]
    Definition Classes
    RDD
  142. def zipPartitions[B, C, V](rdd2: RDD[B], rdd3: RDD[C])(f: (Iterator[InternalRow], Iterator[B], Iterator[C]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[V]): RDD[V]
    Definition Classes
    RDD
  143. def zipPartitions[B, C, V](rdd2: RDD[B], rdd3: RDD[C], preservesPartitioning: Boolean)(f: (Iterator[InternalRow], Iterator[B], Iterator[C]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[V]): RDD[V]
    Definition Classes
    RDD
  144. def zipPartitions[B, V](rdd2: RDD[B])(f: (Iterator[InternalRow], Iterator[B]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[V]): RDD[V]
    Definition Classes
    RDD
  145. def zipPartitions[B, V](rdd2: RDD[B], preservesPartitioning: Boolean)(f: (Iterator[InternalRow], Iterator[B]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[V]): RDD[V]
    Definition Classes
    RDD
  146. def zipPartitionsWithEvaluator[U](rdd2: RDD[InternalRow], evaluatorFactory: PartitionEvaluatorFactory[InternalRow, U])(implicit arg0: ClassTag[U]): RDD[U]
    Definition Classes
    RDD
    Annotations
    @DeveloperApi() @Since("3.5.0")
  147. def zipWithIndex(): RDD[(InternalRow, Long)]
    Definition Classes
    RDD
  148. def zipWithUniqueId(): RDD[(InternalRow, Long)]
    Definition Classes
    RDD

Deprecated Value Members

  1. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.Throwable]) @Deprecated
    Deprecated

    (Since version 9)

Inherited from RDD[InternalRow]

Inherited from Logging

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Ungrouped