class FileScanRDD extends RDD[InternalRow]
An RDD that scans a list of file partitions.
Linear Supertypes
Ordering
- Alphabetic
- By Inheritance
Inherited
- FileScanRDD
- RDD
- Logging
- Serializable
- AnyRef
- Any
- Hide All
- Show All
Visibility
- Public
- Protected
Instance Constructors
- new FileScanRDD(sparkSession: SparkSession, readFunction: (PartitionedFile) => Iterator[InternalRow], filePartitions: Seq[FilePartition], readDataSchema: StructType, metadataColumns: Seq[AttributeReference] = Seq.empty)
Value Members
- final def !=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- final def ##: Int
- Definition Classes
- AnyRef → Any
- def ++(other: RDD[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- final def ==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- def aggregate[U](zeroValue: U)(seqOp: (U, InternalRow) => U, combOp: (U, U) => U)(implicit arg0: ClassTag[U]): U
- Definition Classes
- RDD
- final def asInstanceOf[T0]: T0
- Definition Classes
- Any
- def barrier(): RDDBarrier[InternalRow]
- Definition Classes
- RDD
- Annotations
- @Experimental() @Since("2.4.0")
- def cache(): FileScanRDD.this.type
- Definition Classes
- RDD
- def cartesian[U](other: RDD[U])(implicit arg0: ClassTag[U]): RDD[(InternalRow, U)]
- Definition Classes
- RDD
- def checkpoint(): Unit
- Definition Classes
- RDD
- def cleanShuffleDependencies(blocking: Boolean): Unit
- Definition Classes
- RDD
- Annotations
- @DeveloperApi() @Since("3.1.0")
- def clearDependencies(): Unit
- Attributes
- protected
- Definition Classes
- RDD
- def clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.CloneNotSupportedException]) @native()
- def coalesce(numPartitions: Int, shuffle: Boolean, partitionCoalescer: Option[PartitionCoalescer])(implicit ord: Ordering[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- def collect[U](f: PartialFunction[InternalRow, U])(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
- def collect(): Array[InternalRow]
- Definition Classes
- RDD
- def compute(split: Partition, context: TaskContext): Iterator[InternalRow]
- Definition Classes
- FileScanRDD → RDD
- def context: SparkContext
- Definition Classes
- RDD
- def count(): Long
- Definition Classes
- RDD
- def countApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble]
- Definition Classes
- RDD
- def countApproxDistinct(relativeSD: Double): Long
- Definition Classes
- RDD
- def countApproxDistinct(p: Int, sp: Int): Long
- Definition Classes
- RDD
- def countByValue()(implicit ord: Ordering[InternalRow]): Map[InternalRow, Long]
- Definition Classes
- RDD
- def countByValueApprox(timeout: Long, confidence: Double)(implicit ord: Ordering[InternalRow]): PartialResult[Map[InternalRow, BoundedDouble]]
- Definition Classes
- RDD
- final def dependencies: Seq[Dependency[_]]
- Definition Classes
- RDD
- def distinct(): RDD[InternalRow]
- Definition Classes
- RDD
- def distinct(numPartitions: Int)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- final def eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- def equals(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef → Any
- val filePartitions: Seq[FilePartition]
- def filter(f: (InternalRow) => Boolean): RDD[InternalRow]
- Definition Classes
- RDD
- def finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.Throwable])
- def first(): InternalRow
- Definition Classes
- RDD
- def firstParent[U](implicit arg0: ClassTag[U]): RDD[U]
- Attributes
- protected[spark]
- Definition Classes
- RDD
- def flatMap[U](f: (InternalRow) => TraversableOnce[U])(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
- def fold(zeroValue: InternalRow)(op: (InternalRow, InternalRow) => InternalRow): InternalRow
- Definition Classes
- RDD
- def foreach(f: (InternalRow) => Unit): Unit
- Definition Classes
- RDD
- def foreachPartition(f: (Iterator[InternalRow]) => Unit): Unit
- Definition Classes
- RDD
- def getCheckpointFile: Option[String]
- Definition Classes
- RDD
- final def getClass(): Class[_ <: AnyRef]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- def getDependencies: Seq[Dependency[_]]
- Attributes
- protected
- Definition Classes
- RDD
- final def getNumPartitions: Int
- Definition Classes
- RDD
- Annotations
- @Since("1.6.0")
- def getOutputDeterministicLevel: rdd.DeterministicLevel.Value
- Attributes
- protected
- Definition Classes
- RDD
- Annotations
- @DeveloperApi()
- def getPartitions: Array[Partition]
- Attributes
- protected
- Definition Classes
- FileScanRDD → RDD
- def getPreferredLocations(split: Partition): Seq[String]
- Attributes
- protected
- Definition Classes
- FileScanRDD → RDD
- def getResourceProfile(): ResourceProfile
- Definition Classes
- RDD
- Annotations
- @Experimental() @Since("3.1.0")
- def getStorageLevel: StorageLevel
- Definition Classes
- RDD
- def glom(): RDD[Array[InternalRow]]
- Definition Classes
- RDD
- def groupBy[K](f: (InternalRow) => K, p: Partitioner)(implicit kt: ClassTag[K], ord: Ordering[K]): RDD[(K, Iterable[InternalRow])]
- Definition Classes
- RDD
- def groupBy[K](f: (InternalRow) => K, numPartitions: Int)(implicit kt: ClassTag[K]): RDD[(K, Iterable[InternalRow])]
- Definition Classes
- RDD
- def groupBy[K](f: (InternalRow) => K)(implicit kt: ClassTag[K]): RDD[(K, Iterable[InternalRow])]
- Definition Classes
- RDD
- def hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- val id: Int
- Definition Classes
- RDD
- def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
- def initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def intersection(other: RDD[InternalRow], numPartitions: Int): RDD[InternalRow]
- Definition Classes
- RDD
- def intersection(other: RDD[InternalRow], partitioner: Partitioner)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- def intersection(other: RDD[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- lazy val isBarrier_: Boolean
- Attributes
- protected
- Definition Classes
- RDD
- Annotations
- @transient()
- def isCheckpointed: Boolean
- Definition Classes
- RDD
- def isEmpty(): Boolean
- Definition Classes
- RDD
- final def isInstanceOf[T0]: Boolean
- Definition Classes
- Any
- def isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
- final def iterator(split: Partition, context: TaskContext): Iterator[InternalRow]
- Definition Classes
- RDD
- def keyBy[K](f: (InternalRow) => K): RDD[(K, InternalRow)]
- Definition Classes
- RDD
- def localCheckpoint(): FileScanRDD.this.type
- Definition Classes
- RDD
- def log: Logger
- Attributes
- protected
- Definition Classes
- Logging
- def logDebug(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logDebug(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logError(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logError(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logInfo(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logInfo(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logName: String
- Attributes
- protected
- Definition Classes
- Logging
- def logTrace(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logTrace(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logWarning(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logWarning(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def map[U](f: (InternalRow) => U)(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
- def mapPartitions[U](f: (Iterator[InternalRow]) => Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
- def mapPartitionsWithIndex[U](f: (Int, Iterator[InternalRow]) => Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
- def max()(implicit ord: Ordering[InternalRow]): InternalRow
- Definition Classes
- RDD
- val metadataColumns: Seq[AttributeReference]
- def min()(implicit ord: Ordering[InternalRow]): InternalRow
- Definition Classes
- RDD
- var name: String
- Definition Classes
- RDD
- final def ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- final def notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- final def notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- def parent[U](j: Int)(implicit arg0: ClassTag[U]): RDD[U]
- Attributes
- protected[spark]
- Definition Classes
- RDD
- val partitioner: Option[Partitioner]
- Definition Classes
- RDD
- final def partitions: Array[Partition]
- Definition Classes
- RDD
- def persist(): FileScanRDD.this.type
- Definition Classes
- RDD
- def persist(newLevel: StorageLevel): FileScanRDD.this.type
- Definition Classes
- RDD
- def pipe(command: Seq[String], env: Map[String, String], printPipeContext: ((String) => Unit) => Unit, printRDDElement: (InternalRow, (String) => Unit) => Unit, separateWorkingDir: Boolean, bufferSize: Int, encoding: String): RDD[String]
- Definition Classes
- RDD
- def pipe(command: String, env: Map[String, String]): RDD[String]
- Definition Classes
- RDD
- def pipe(command: String): RDD[String]
- Definition Classes
- RDD
- final def preferredLocations(split: Partition): Seq[String]
- Definition Classes
- RDD
- def randomSplit(weights: Array[Double], seed: Long): Array[RDD[InternalRow]]
- Definition Classes
- RDD
- val readDataSchema: StructType
- def reduce(f: (InternalRow, InternalRow) => InternalRow): InternalRow
- Definition Classes
- RDD
- def repartition(numPartitions: Int)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- def sample(withReplacement: Boolean, fraction: Double, seed: Long): RDD[InternalRow]
- Definition Classes
- RDD
- def saveAsObjectFile(path: String): Unit
- Definition Classes
- RDD
- def saveAsTextFile(path: String, codec: Class[_ <: CompressionCodec]): Unit
- Definition Classes
- RDD
- def saveAsTextFile(path: String): Unit
- Definition Classes
- RDD
- def setName(_name: String): FileScanRDD.this.type
- Definition Classes
- RDD
- def sortBy[K](f: (InternalRow) => K, ascending: Boolean, numPartitions: Int)(implicit ord: Ordering[K], ctag: ClassTag[K]): RDD[InternalRow]
- Definition Classes
- RDD
- def sparkContext: SparkContext
- Definition Classes
- RDD
- def subtract(other: RDD[InternalRow], p: Partitioner)(implicit ord: Ordering[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- def subtract(other: RDD[InternalRow], numPartitions: Int): RDD[InternalRow]
- Definition Classes
- RDD
- def subtract(other: RDD[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- final def synchronized[T0](arg0: => T0): T0
- Definition Classes
- AnyRef
- def take(num: Int): Array[InternalRow]
- Definition Classes
- RDD
- def takeOrdered(num: Int)(implicit ord: Ordering[InternalRow]): Array[InternalRow]
- Definition Classes
- RDD
- def takeSample(withReplacement: Boolean, num: Int, seed: Long): Array[InternalRow]
- Definition Classes
- RDD
- def toDebugString: String
- Definition Classes
- RDD
- def toJavaRDD(): JavaRDD[InternalRow]
- Definition Classes
- RDD
- def toLocalIterator: Iterator[InternalRow]
- Definition Classes
- RDD
- def toString(): String
- Definition Classes
- RDD → AnyRef → Any
- def top(num: Int)(implicit ord: Ordering[InternalRow]): Array[InternalRow]
- Definition Classes
- RDD
- def treeAggregate[U](zeroValue: U, seqOp: (U, InternalRow) => U, combOp: (U, U) => U, depth: Int, finalAggregateOnExecutor: Boolean)(implicit arg0: ClassTag[U]): U
- Definition Classes
- RDD
- def treeAggregate[U](zeroValue: U)(seqOp: (U, InternalRow) => U, combOp: (U, U) => U, depth: Int)(implicit arg0: ClassTag[U]): U
- Definition Classes
- RDD
- def treeReduce(f: (InternalRow, InternalRow) => InternalRow, depth: Int): InternalRow
- Definition Classes
- RDD
- def union(other: RDD[InternalRow]): RDD[InternalRow]
- Definition Classes
- RDD
- def unpersist(blocking: Boolean): FileScanRDD.this.type
- Definition Classes
- RDD
- final def wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException])
- final def wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException])
- final def wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException]) @native()
- def withResources(rp: ResourceProfile): FileScanRDD.this.type
- Definition Classes
- RDD
- Annotations
- @Experimental() @Since("3.1.0")
- def zip[U](other: RDD[U])(implicit arg0: ClassTag[U]): RDD[(InternalRow, U)]
- Definition Classes
- RDD
- def zipPartitions[B, C, D, V](rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D])(f: (Iterator[InternalRow], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[D], arg3: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
- def zipPartitions[B, C, D, V](rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D], preservesPartitioning: Boolean)(f: (Iterator[InternalRow], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[D], arg3: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
- def zipPartitions[B, C, V](rdd2: RDD[B], rdd3: RDD[C])(f: (Iterator[InternalRow], Iterator[B], Iterator[C]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
- def zipPartitions[B, C, V](rdd2: RDD[B], rdd3: RDD[C], preservesPartitioning: Boolean)(f: (Iterator[InternalRow], Iterator[B], Iterator[C]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
- def zipPartitions[B, V](rdd2: RDD[B])(f: (Iterator[InternalRow], Iterator[B]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
- def zipPartitions[B, V](rdd2: RDD[B], preservesPartitioning: Boolean)(f: (Iterator[InternalRow], Iterator[B]) => Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
- def zipWithIndex(): RDD[(InternalRow, Long)]
- Definition Classes
- RDD
- def zipWithUniqueId(): RDD[(InternalRow, Long)]
- Definition Classes
- RDD