Spark Project SQL 3.0.1 API - org.apache.spark.sql.execution.streaming.StreamingDeduplicateExec

final def !=(arg0: Any): Boolean

Definition Classes: AnyRef → Any

final def ##(): Int

Definition Classes: AnyRef → Any

final def ==(arg0: Any): Boolean

Definition Classes: AnyRef → Any

lazy val allAttributes: AttributeSeq

Definition Classes: QueryPlan

def apply(number: Int): TreeNode[_]

Definition Classes: TreeNode

def argString(maxFields: Int): String

Definition Classes: TreeNode

def asCode: String

Definition Classes: TreeNode

final def asInstanceOf[T0]: T0

Definition Classes: Any

final lazy val canonicalized: SparkPlan

Definition Classes: QueryPlan
Annotations: @transient()

val child: SparkPlan

Definition Classes: StreamingDeduplicateExec → UnaryExecNode

final def children: Seq[SparkPlan]

Definition Classes: UnaryExecNode → TreeNode

def cleanupResources(): Unit

Cleans up the resources used by the physical operator (if any).

Cleans up the resources used by the physical operator (if any). In general, all the resources should be cleaned up when the task finishes but operators like SortMergeJoinExec and LimitExec may want eager cleanup to free up tight resources (e.g., memory).

Attributes: protected[sql]
Definition Classes: SparkPlan

def clone(): SparkPlan

Definition Classes: TreeNode → AnyRef

def collect[B](pf: PartialFunction[SparkPlan, B]): Seq[B]

Definition Classes: TreeNode

def collectFirst[B](pf: PartialFunction[SparkPlan, B]): Option[B]

Definition Classes: TreeNode

def collectLeaves(): Seq[SparkPlan]

Definition Classes: TreeNode

def collectWithSubqueries[B](f: PartialFunction[SparkPlan, B]): Seq[B]

Definition Classes: QueryPlan

def conf: SQLConf

Definition Classes: QueryPlan

lazy val containsChild: Set[TreeNode[_]]

Definition Classes: TreeNode

def copyTagsFrom(other: SparkPlan): Unit

Attributes: protected
Definition Classes: TreeNode

def doCanonicalize(): SparkPlan

Attributes: protected
Definition Classes: QueryPlan

def doExecute(): RDD[InternalRow]

Produces the result of the query as an RDD[InternalRow]

Overridden by concrete implementations of SparkPlan.

Attributes: protected
Definition Classes: StreamingDeduplicateExec → SparkPlan

def doExecuteBroadcast[T](): Broadcast[T]

Produces the result of the query as a broadcast variable.

Overridden by concrete implementations of SparkPlan.

Attributes: protected[sql]
Definition Classes: SparkPlan

def doExecuteColumnar(): RDD[ColumnarBatch]

Produces the result of the query as an RDD[ColumnarBatch] if supportsColumnar returns true.

Produces the result of the query as an RDD[ColumnarBatch] if supportsColumnar returns true. By convention the executor that creates a ColumnarBatch is responsible for closing it when it is no longer needed. This allows input formats to be able to reuse batches if needed.

Attributes: protected
Definition Classes: SparkPlan

def doPrepare(): Unit

Overridden by concrete implementations of SparkPlan.

Overridden by concrete implementations of SparkPlan. It is guaranteed to run before any execute of SparkPlan. This is helpful if we want to set up some state before executing the query, e.g., BroadcastHashJoin uses it to broadcast asynchronously.

Attributes: protected
Definition Classes: SparkPlan
Note: prepare method has already walked down the tree, so the implementation doesn't have to call children's prepare methods. This will only be called once, protected by this.

final def eq(arg0: AnyRef): Boolean

Definition Classes: AnyRef

val eventTimeWatermark: Option[Long]

The watermark value.

Definition Classes: StreamingDeduplicateExec → WatermarkSupport

final def execute(): RDD[InternalRow]

Returns the result of this query as an RDD[InternalRow] by delegating to doExecute after preparations.

Concrete implementations of SparkPlan should override doExecute.

Definition Classes: SparkPlan

final def executeBroadcast[T](): Broadcast[T]

Returns the result of this query as a broadcast variable by delegating to doExecuteBroadcast after preparations.

Concrete implementations of SparkPlan should override doExecuteBroadcast.

Definition Classes: SparkPlan

def executeCollect(): Array[InternalRow]

Runs this query returning the result as an array.

Definition Classes: SparkPlan

def executeCollectPublic(): Array[Row]

Runs this query returning the result as an array, using external Row format.

Definition Classes: SparkPlan

final def executeColumnar(): RDD[ColumnarBatch]

Returns the result of this query as an RDD[ColumnarBatch] by delegating to doColumnarExecute after preparations.

Concrete implementations of SparkPlan should override doColumnarExecute if supportsColumnar returns true.

Definition Classes: SparkPlan

final def executeQuery[T](query: ⇒ T): T

Executes a query after preparing the query and adding query plan information to created RDDs for visualization.

Attributes: protected
Definition Classes: SparkPlan

def executeTail(n: Int): Array[InternalRow]

Runs this query returning the last n rows as an array.

This is modeled after RDD.take but never runs any job locally on the driver.

Definition Classes: SparkPlan

def executeTake(n: Int): Array[InternalRow]

Runs this query returning the first n rows as an array.

This is modeled after RDD.take but never runs any job locally on the driver.

Definition Classes: SparkPlan

def executeToIterator(): Iterator[InternalRow]

Runs this query returning the result as an iterator of InternalRow.

Definition Classes: SparkPlan
Note: Triggers multiple jobs (one for each partition).

final def expressions: Seq[Expression]

Definition Classes: QueryPlan

def fastEquals(other: TreeNode[_]): Boolean

Definition Classes: TreeNode

def finalize(): Unit

Attributes: protected[lang]
Definition Classes: AnyRef
Annotations: @throws( classOf[java.lang.Throwable] )

def find(f: (SparkPlan) ⇒ Boolean): Option[SparkPlan]

Definition Classes: TreeNode

def flatMap[A](f: (SparkPlan) ⇒ TraversableOnce[A]): Seq[A]

Definition Classes: TreeNode

def foreach(f: (SparkPlan) ⇒ Unit): Unit

Definition Classes: TreeNode

def foreachUp(f: (SparkPlan) ⇒ Unit): Unit

Definition Classes: TreeNode

def formattedNodeName: String

Attributes: protected
Definition Classes: QueryPlan

def generateTreeString(depth: Int, lastChildren: Seq[Boolean], append: (String) ⇒ Unit, verbose: Boolean, prefix: String, addSuffix: Boolean, maxFields: Int, printNodeId: Boolean): Unit

Definition Classes: TreeNode

final def getClass(): Class[_]

Definition Classes: AnyRef → Any
Annotations: @native()

def getProgress(): StateOperatorProgress

Get the progress made by this stateful operator after execution.

Get the progress made by this stateful operator after execution. This should be called in the driver after this SparkPlan has been executed and metrics have been updated.

Definition Classes: StateStoreWriter

def getStateInfo: StatefulOperatorStateInfo

Attributes: protected
Definition Classes: StatefulOperator

def getTagValue[T](tag: TreeNodeTag[T]): Option[T]

Definition Classes: TreeNode

def hashCode(): Int

Definition Classes: TreeNode → AnyRef → Any

val id: Int

Definition Classes: SparkPlan

def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean

Attributes: protected
Definition Classes: Logging

def initializeLogIfNecessary(isInterpreter: Boolean): Unit

Attributes: protected
Definition Classes: Logging

def innerChildren: Seq[QueryPlan[_]]

Definition Classes: QueryPlan → TreeNode

def inputSet: AttributeSet

Definition Classes: QueryPlan

def isCanonicalizedPlan: Boolean

Attributes: protected
Definition Classes: QueryPlan

final def isInstanceOf[T0]: Boolean

Definition Classes: Any

def isTraceEnabled(): Boolean

Attributes: protected
Definition Classes: Logging

def jsonFields: List[JField]

Attributes: protected
Definition Classes: TreeNode

val keyExpressions: Seq[Attribute]

The keys that may have a watermark attribute.

Definition Classes: StreamingDeduplicateExec → WatermarkSupport

def log: Logger

Attributes: protected
Definition Classes: Logging

def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes: protected
Definition Classes: Logging

def logDebug(msg: ⇒ String): Unit

Attributes: protected
Definition Classes: Logging

def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes: protected
Definition Classes: Logging

def logError(msg: ⇒ String): Unit

Attributes: protected
Definition Classes: Logging

def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes: protected
Definition Classes: Logging

def logInfo(msg: ⇒ String): Unit

Attributes: protected
Definition Classes: Logging

def logName: String

Attributes: protected
Definition Classes: Logging

def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes: protected
Definition Classes: Logging

def logTrace(msg: ⇒ String): Unit

Attributes: protected
Definition Classes: Logging

def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes: protected
Definition Classes: Logging

def logWarning(msg: ⇒ String): Unit

Attributes: protected
Definition Classes: Logging

def logicalLink: Option[LogicalPlan]

returns: The logical plan this plan is linked to.

Definition Classes: SparkPlan

def longMetric(name: String): SQLMetric

returns: SQLMetric for the name.

Definition Classes: SparkPlan

def makeCopy(newArgs: Array[AnyRef]): SparkPlan

Overridden make copy also propagates sqlContext to copied plan.

Definition Classes: SparkPlan → TreeNode

def map[A](f: (SparkPlan) ⇒ A): Seq[A]

Definition Classes: TreeNode

def mapChildren(f: (SparkPlan) ⇒ SparkPlan): SparkPlan

Definition Classes: TreeNode

def mapExpressions(f: (Expression) ⇒ Expression): StreamingDeduplicateExec.this.type

Definition Classes: QueryPlan

def mapProductIterator[B](f: (Any) ⇒ B)(implicit arg0: ClassTag[B]): Array[B]

Attributes: protected
Definition Classes: TreeNode

lazy val metrics: Map[String, SQLMetric]

returns: All metrics containing metrics of this SparkPlan.

Definition Classes: StateStoreWriter → SparkPlan

final def missingInput: AttributeSet

Definition Classes: QueryPlan

final def ne(arg0: AnyRef): Boolean

Definition Classes: AnyRef

def nodeName: String

Definition Classes: TreeNode

final def notify(): Unit

Definition Classes: AnyRef
Annotations: @native()

final def notifyAll(): Unit

Definition Classes: AnyRef
Annotations: @native()

def numberedTreeString: String

Definition Classes: TreeNode

val origin: Origin

Definition Classes: TreeNode

def otherCopyArgs: Seq[AnyRef]

Attributes: protected
Definition Classes: TreeNode

def output: Seq[Attribute]

Definition Classes: StreamingDeduplicateExec → QueryPlan

def outputOrdering: Seq[SortOrder]

Specifies how data is ordered in each partition.

Definition Classes: SparkPlan

def outputPartitioning: Partitioning

Specifies how data is partitioned across different nodes in the cluster.

Definition Classes: StreamingDeduplicateExec → SparkPlan

lazy val outputSet: AttributeSet

Definition Classes: QueryPlan
Annotations: @transient()

def p(number: Int): SparkPlan

Definition Classes: TreeNode

final def prepare(): Unit

Prepares this SparkPlan for execution.

Prepares this SparkPlan for execution. It's idempotent.

Definition Classes: SparkPlan

def prepareSubqueries(): Unit

Finds scalar subquery expressions in this plan node and starts evaluating them.

Attributes: protected
Definition Classes: SparkPlan

def prettyJson: String

Definition Classes: TreeNode

def printSchema(): Unit

Definition Classes: QueryPlan

def producedAttributes: AttributeSet

Definition Classes: QueryPlan

lazy val references: AttributeSet

Definition Classes: QueryPlan
Annotations: @transient()

def removeKeysOlderThanWatermark(storeManager: StreamingAggregationStateManager, store: StateStore): Unit

Attributes: protected
Definition Classes: WatermarkSupport

def removeKeysOlderThanWatermark(store: StateStore): Unit

Attributes: protected
Definition Classes: WatermarkSupport

def requiredChildDistribution: Seq[Distribution]

Distribute by grouping attributes

Definition Classes: StreamingDeduplicateExec → SparkPlan

def requiredChildOrdering: Seq[Seq[SortOrder]]

Specifies sort order for each partition requirements on the input data for this operator.

Definition Classes: SparkPlan

def resetMetrics(): Unit

Resets all the metrics.

Definition Classes: SparkPlan

final def sameResult(other: SparkPlan): Boolean

Definition Classes: QueryPlan

lazy val schema: StructType

Definition Classes: QueryPlan

def schemaString: String

Definition Classes: QueryPlan

final def semanticHash(): Int

Definition Classes: QueryPlan

def setLogicalLink(logicalPlan: LogicalPlan): Unit

Set logical plan link recursively if unset.

Definition Classes: SparkPlan

def setStoreMetrics(store: StateStore): Unit

Set the SQL metrics related to the state store.

Set the SQL metrics related to the state store. This should be called in that task after the store has been updated.

Attributes: protected
Definition Classes: StateStoreWriter

def setTagValue[T](tag: TreeNodeTag[T], value: T): Unit

Definition Classes: TreeNode

def shouldRunAnotherBatch(newMetadata: OffsetSeqMetadata): Boolean

Should the MicroBatchExecution run another batch based on this stateful operator and the current updated metadata.

Definition Classes: StreamingDeduplicateExec → StateStoreWriter

def simpleString(maxFields: Int): String

Definition Classes: QueryPlan → TreeNode

def simpleStringWithNodeId(): String

Definition Classes: QueryPlan → TreeNode

def sparkContext: SparkContext

Attributes: protected
Definition Classes: SparkPlan

final val sqlContext: SQLContext

A handle to the SQL Context that was used to create this plan.

A handle to the SQL Context that was used to create this plan. Since many operators need access to the sqlContext for RDD operations or configuration this field is automatically populated by the query planning infrastructure.

Definition Classes: SparkPlan

val stateInfo: Option[StatefulOperatorStateInfo]

Definition Classes: StreamingDeduplicateExec → StatefulOperator

def statePrefix: String

Attributes: protected
Definition Classes: QueryPlan

def stringArgs: Iterator[Any]

Attributes: protected
Definition Classes: TreeNode

def subqueries: Seq[SparkPlan]

Definition Classes: QueryPlan

def subqueriesAll: Seq[SparkPlan]

Definition Classes: QueryPlan

def supportsColumnar: Boolean

Return true if this stage of the plan supports columnar execution.

Definition Classes: SparkPlan

final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes: AnyRef

def timeTakenMs(body: ⇒ Unit): Long

Records the duration of running body for the next query progress update.

Attributes: protected
Definition Classes: StateStoreWriter

def toJSON: String

Definition Classes: TreeNode

def toString(): String

Definition Classes: TreeNode → AnyRef → Any

def transform(rule: PartialFunction[SparkPlan, SparkPlan]): SparkPlan

Definition Classes: TreeNode

def transformAllExpressions(rule: PartialFunction[Expression, Expression]): StreamingDeduplicateExec.this.type

Definition Classes: QueryPlan

def transformDown(rule: PartialFunction[SparkPlan, SparkPlan]): SparkPlan

Definition Classes: TreeNode

def transformExpressions(rule: PartialFunction[Expression, Expression]): StreamingDeduplicateExec.this.type

Definition Classes: QueryPlan

def transformExpressionsDown(rule: PartialFunction[Expression, Expression]): StreamingDeduplicateExec.this.type

Definition Classes: QueryPlan

def transformExpressionsUp(rule: PartialFunction[Expression, Expression]): StreamingDeduplicateExec.this.type

Definition Classes: QueryPlan

def transformUp(rule: PartialFunction[SparkPlan, SparkPlan]): SparkPlan

Definition Classes: TreeNode

def treeString(append: (String) ⇒ Unit, verbose: Boolean, addSuffix: Boolean, maxFields: Int, printOperatorId: Boolean): Unit

Definition Classes: TreeNode

final def treeString(verbose: Boolean, addSuffix: Boolean, maxFields: Int, printOperatorId: Boolean): String

Definition Classes: TreeNode

final def treeString: String

Definition Classes: TreeNode

def unsetTagValue[T](tag: TreeNodeTag[T]): Unit

Definition Classes: TreeNode

def vectorTypes: Option[Seq[String]]

The exact java types of the columns that are output in columnar processing mode.

The exact java types of the columns that are output in columnar processing mode. This is a performance optimization for code generation and is optional.

Definition Classes: SparkPlan

def verboseString(maxFields: Int): String

Definition Classes: QueryPlan → TreeNode

def verboseStringWithOperatorId(): String

Definition Classes: UnaryExecNode → QueryPlan

def verboseStringWithSuffix(maxFields: Int): String

Definition Classes: TreeNode

final def wait(): Unit

Definition Classes: AnyRef
Annotations: @throws( ... )

final def wait(arg0: Long, arg1: Int): Unit

Definition Classes: AnyRef
Annotations: @throws( ... )

final def wait(arg0: Long): Unit

Definition Classes: AnyRef
Annotations: @throws( ... ) @native()

def waitForSubqueries(): Unit

Blocks the thread until all subqueries finish evaluation and update the results.

Attributes: protected
Definition Classes: SparkPlan

lazy val watermarkExpression: Option[Expression]

Generate an expression that matches data older than the watermark

Definition Classes: WatermarkSupport

lazy val watermarkPredicateForData: Option[BasePredicate]

Predicate based on the child output that matches data older than the watermark.

Definition Classes: WatermarkSupport

lazy val watermarkPredicateForKeys: Option[BasePredicate]

Predicate based on keys that matches data older than the watermark

Definition Classes: WatermarkSupport

def withNewChildren(newChildren: Seq[SparkPlan]): SparkPlan

Definition Classes: TreeNode

Packages

StreamingDeduplicateExec

Companion object StreamingDeduplicateExec

case class StreamingDeduplicateExec(keyExpressions: Seq[Attribute], child: SparkPlan, stateInfo: Option[StatefulOperatorStateInfo] = None, eventTimeWatermark: Option[Long] = None) extends SparkPlan with UnaryExecNode with StateStoreWriter with WatermarkSupport with Product with Serializable

Instance Constructors

Value Members

Inherited from WatermarkSupport

Inherited from StateStoreWriter

Inherited from StatefulOperator

Inherited from UnaryExecNode

Inherited from SparkPlan

Inherited from Serializable

Inherited from Serializable

Inherited from Logging

Inherited from QueryPlan[SparkPlan]

Inherited from TreeNode[SparkPlan]

Inherited from Product

Inherited from Equals

Inherited from AnyRef

Inherited from Any

Ungrouped

Packages

StreamingDeduplicateExec 

Companion object StreamingDeduplicateExec

case class StreamingDeduplicateExec(keyExpressions: Seq[Attribute], child: SparkPlan, stateInfo: Option[StatefulOperatorStateInfo] = None, eventTimeWatermark: Option[Long] = None) extends SparkPlan with UnaryExecNode with StateStoreWriter with WatermarkSupport with Product with Serializable

Instance Constructors

Value Members

Inherited from WatermarkSupport

Inherited from StateStoreWriter

Inherited from StatefulOperator

Inherited from UnaryExecNode

Inherited from SparkPlan

Inherited from Serializable

Inherited from Serializable

Inherited from Logging

Inherited from QueryPlan[SparkPlan]

Inherited from TreeNode[SparkPlan]

Inherited from Product

Inherited from Equals

Inherited from AnyRef

Inherited from Any

Ungrouped

StreamingDeduplicateExec