Object

org.archive.archivespark.sparkling.util

RddUtil

Related Doc: package util

Permalink

object RddUtil

Linear Supertypes
AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. RddUtil
  2. AnyRef
  3. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Type Members

  1. case class AggregateRecordsPointer[D, A](value: A, records: RecordsPointer[D])(implicit evidence$2: ClassTag[D], evidence$3: ClassTag[A]) extends Product with Serializable

    Permalink
  2. case class RecordsPointer[D](rdd: RDD[D], partitionIdx: Int, offset: Int, length: Int)(implicit evidence$1: ClassTag[D]) extends Product with Serializable

    Permalink

Value Members

  1. final def !=(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  4. def accessPartitionRange[D](rdd: RDD[D], partitionIdx: Int, offset: Int, length: Int)(implicit arg0: ClassTag[D]): Array[D]

    Permalink
  5. def accessPartitionRange[D](rdd: RDD[D], pointer: RecordsPointer[D])(implicit arg0: ClassTag[D]): Array[D]

    Permalink
  6. final def asInstanceOf[T0]: T0

    Permalink
    Definition Classes
    Any
  7. def cache[A](rdd: RDD[A])(implicit arg0: ClassTag[A], arg1: TypedInOut[A]): RDD[A]

    Permalink
  8. def clone(): AnyRef

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  9. def collectDistinct[A](rdd: RDD[A], minus: Set[A] = Set.empty[A])(implicit arg0: ClassTag[A]): Set[A]

    Permalink
  10. def distinct[D](rdd: RDD[D], subtract: TraversableOnce[RDD[D]], partitioner: Partitioner)(implicit arg0: ClassTag[D]): RDD[D]

    Permalink
  11. def distinct[D](rdd: RDD[D], partitioner: Partitioner)(implicit arg0: ClassTag[D]): RDD[D]

    Permalink
  12. def distinct[D](rdd: RDD[D], subtract: TraversableOnce[RDD[D]] = Seq.empty, partitions: Int = parallelism)(implicit arg0: ClassTag[D]): RDD[D]

    Permalink
  13. def doPartitions[A](rdd: RDD[A])(action: (Int) ⇒ Unit)(implicit arg0: ClassTag[A]): RDD[A]

    Permalink
  14. final def eq(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  15. def equals(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  16. def finalize(): Unit

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  17. final def getClass(): Class[_]

    Permalink
    Definition Classes
    AnyRef → Any
  18. def groupSorted[A, B](rdd: RDD[A], groupBy: (A) ⇒ B): RDD[(B, Iterator[A])]

    Permalink
  19. def hashCode(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  20. final def isInstanceOf[T0]: Boolean

    Permalink
    Definition Classes
    Any
  21. def iterate[D](rdd: RDD[D], bufferSize: Int = 1000)(implicit arg0: ClassTag[D]): CleanupIterator[D]

    Permalink
  22. def iterateAggregates[D, A](rdd: RDD[D], bufferSize: Int = 1000)(aggregate: (Seq[D]) ⇒ A)(implicit arg0: ClassTag[D], arg1: ClassTag[A]): CleanupIterator[AggregateRecordsPointer[D, A]]

    Permalink
  23. def iterateDistinctPartitions[D](rdd: RDD[D], subtract: TraversableOnce[RDD[D]] = Seq.empty, partitions: Int = parallelism)(implicit arg0: ClassTag[D]): Iterator[Set[D]]

    Permalink
  24. def iteratePartitions[D](rdd: RDD[D])(implicit arg0: ClassTag[D]): Iterator[Seq[D]]

    Permalink
  25. def lazyFlatMap[A, B](rdd: RDD[A])(map: (A) ⇒ TraversableOnce[B])(implicit arg0: ClassTag[A], arg1: ClassTag[B]): RDD[B]

    Permalink
  26. def lazyMapPartitions[A, B](rdd: RDD[A])(map: (Int, Iterator[A]) ⇒ Iterator[B])(implicit arg0: ClassTag[A], arg1: ClassTag[B]): RDD[B]

    Permalink
  27. def loadBinary[A](path: String, decompress: Boolean = true, close: Boolean = true, readFully: Boolean = false, sorted: Boolean = false, strategy: LoadingStrategy = HdfsIO.defaultLoadingStrategy, repartitionFiles: Int = 0)(action: (String, InputStream) ⇒ TraversableOnce[A])(implicit arg0: ClassTag[A]): RDD[A]

    Permalink
  28. def loadBinaryLazy[A](path: String, decompress: Boolean = true, close: Boolean = true, readFully: Boolean = false, sorted: Boolean = false, strategy: LoadingStrategy = HdfsIO.defaultLoadingStrategy, repartitionFiles: Int = 0)(action: (String, ManagedVal[InputStream]) ⇒ TraversableOnce[A])(implicit arg0: ClassTag[A]): RDD[A]

    Permalink
  29. def loadFileGroups(path: String, group: (String) ⇒ String): RDD[(String, Seq[String])]

    Permalink
  30. def loadFilesLocality(path: String): RDD[String]

    Permalink
  31. def loadFilesSorted(path: String): RDD[String]

    Permalink
  32. def loadPartitions[A, P](path: String)(partition: (String) ⇒ Iterator[P])(load: (String, P) ⇒ Iterator[A])(implicit arg0: ClassTag[A], arg1: ClassTag[P], arg2: Ordering[P]): RDD[A]

    Permalink
  33. def loadPartitionsByBytes[A](path: String, bytesPerPartition: Long = 10.gb)(load: (CountingInputStream, Int, Boolean) ⇒ Iterator[A])(implicit arg0: ClassTag[A]): RDD[A]

    Permalink
  34. def loadTextFileGroups(path: String, group: (String) ⇒ String, readFully: Boolean = false, strategy: LoadingStrategy = HdfsIO.defaultLoadingStrategy): RDD[(String, Iterator[String])]

    Permalink
  35. def loadTextFiles(path: String, readFully: Boolean = false, sorted: Boolean = false, strategy: LoadingStrategy = HdfsIO.defaultLoadingStrategy, repartitionFiles: Int = 0): RDD[(String, Iterator[String])]

    Permalink
  36. def loadTextLines(path: String, readFully: Boolean = false, sorted: Boolean = false, strategy: LoadingStrategy = HdfsIO.defaultLoadingStrategy, repartitionFiles: Int = 0): RDD[String]

    Permalink
  37. def loadTextLinesGroupedByPrefix(path: String, prefix: (String) ⇒ String, readFully: Boolean = false, sorted: Boolean = false, strategy: LoadingStrategy = HdfsIO.defaultLoadingStrategy): RDD[(String, Iterator[String])]

    Permalink
  38. def loadTextLinesWithFilenames(path: String, readFully: Boolean = false, sorted: Boolean = false, strategy: LoadingStrategy = HdfsIO.defaultLoadingStrategy, repartitionFiles: Int = 0): RDD[(String, String)]

    Permalink
  39. def loadTextPartitionsByBytes(path: String, bytesPerPartition: Long = 10.gb): RDD[String]

    Permalink
  40. def loadTextPartitionsByLines(path: String, linesPerPartition: Int = 100000000): RDD[String]

    Permalink
  41. def loadTyped[A](path: String, readFully: Boolean = false, sorted: Boolean = false, strategy: LoadingStrategy = HdfsIO.defaultLoadingStrategy, repartitionFiles: Int = 0)(implicit arg0: ClassTag[A], arg1: TypedInOut[A]): RDD[A]

    Permalink
  42. final def ne(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  43. final def notify(): Unit

    Permalink
    Definition Classes
    AnyRef
  44. final def notifyAll(): Unit

    Permalink
    Definition Classes
    AnyRef
  45. def parallelize[T](items: Seq[T], partitions: Int)(implicit arg0: ClassTag[T]): RDD[T]

    Permalink
  46. def parallelize[T](items: Seq[T])(implicit arg0: ClassTag[T]): RDD[T]

    Permalink
  47. def parallelize(items: Int, partitions: Int): RDD[Int]

    Permalink
  48. def parallelize(items: Int): RDD[Int]

    Permalink
  49. def repartitionByAndSort[A, S, P](rdd: RDD[(S, A)], partitions: Int = parallelism, ascending: Boolean = true)(by: (S) ⇒ P)(implicit arg0: ClassTag[A], arg1: ClassTag[S], arg2: Ordering[S], arg3: ClassTag[P], ordering: Ordering[S]): RDD[(S, A)]

    Permalink
  50. def repartitionByPrimaryAndSort[A, P, S](rdd: RDD[((P, S), A)], partitions: Int = parallelism, ascending: Boolean = true)(implicit arg0: ClassTag[A], arg1: ClassTag[P], arg2: ClassTag[S], ordering: Ordering[(P, S)]): RDD[((P, S), A)]

    Permalink
  51. def saveAsNamedTextFile(rdd: ⇒ RDD[(String, String)], path: String, partitions: Int = Sparkling.parallelism, repartition: Boolean = false, sorted: Boolean = false, skipIfExists: Boolean = false): Long

    Permalink
  52. def saveAsTextFile(rdd: ⇒ RDD[String], path: String, skipIfExists: Boolean = false, checkPerFile: Boolean = false): Long

    Permalink
  53. def savePartitions[A](rdd: ⇒ RDD[A], path: String, skipIfExists: Boolean = false, checkPerFile: Boolean = false, skipEmpty: Boolean = true)(action: (Iterator[A], OutputStream) ⇒ Long): Long

    Permalink
  54. def saveSplits[A](rdd: ⇒ RDD[Iterator[A]], path: String, skipIfExists: Boolean = false)(action: (Iterator[A], OutputStream) ⇒ Long): Long

    Permalink
  55. def saveTextSplits(rdd: ⇒ RDD[String], path: String, max: Long, length: (String) ⇒ Long, skipIfExists: Boolean = false): Long

    Permalink
  56. def saveTextSplitsByBytes(rdd: ⇒ RDD[String], path: String, bytes: Long = 1.gb, skipIfExists: Boolean = false): Long

    Permalink
  57. def saveTextSplitsByLines(rdd: ⇒ RDD[String], path: String, lines: Int = 1000000, skipIfExists: Boolean = false): Long

    Permalink
  58. def saveTyped[A](rdd: ⇒ RDD[A], path: String, skipIfExists: Boolean = false, checkPerFile: Boolean = false)(implicit arg0: TypedInOut[A]): Long

    Permalink
  59. def shuffle[T](rdd: RDD[T], numPartitions: Int)(implicit arg0: ClassTag[T]): RDD[T]

    Permalink
  60. def sortByAndWithinPartitions[A, P, S](rdd: RDD[A], partitions: Int = parallelism, ascending: Boolean = true)(by: (A) ⇒ (P, S))(implicit arg0: ClassTag[A], arg1: ClassTag[P], arg2: Ordering[P], arg3: ClassTag[S], ordering: Ordering[(P, S)]): RDD[A]

    Permalink
  61. final def synchronized[T0](arg0: ⇒ T0): T0

    Permalink
    Definition Classes
    AnyRef
  62. def toString(): String

    Permalink
    Definition Classes
    AnyRef → Any
  63. final def wait(): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  64. final def wait(arg0: Long, arg1: Int): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  65. final def wait(arg0: Long): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )

Inherited from AnyRef

Inherited from Any

Ungrouped