org.apache.spark.sql.execution.streaming

MicroBatchExecution

Companion object MicroBatchExecution

class MicroBatchExecution extends StreamExecution with AsyncLogPurge

Linear Supertypes

AsyncLogPurge, StreamExecution, Logging, StreamingQuery, StreamingQuery, AnyRef, Any

Known Subclasses

AsyncProgressTrackingMicroBatchExecution

Ordering

Alphabetic
By Inheritance

Inherited

MicroBatchExecution
AsyncLogPurge
StreamExecution
Logging
StreamingQuery
StreamingQuery
AnyRef
Any

Hide All
Show All

Visibility

Public
Protected

Instance Constructors

new MicroBatchExecution(sparkSession: classic.SparkSession, trigger: Trigger, triggerClock: Clock, extraOptions: Map[String, String], plan: WriteToStream)

Type Members

implicit class LogStringContext extends AnyRef
Definition Classes
Logging

Value Members

final def !=(arg0: Any): Boolean
Definition Classes
AnyRef → Any
final def ##: Int
Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean
Definition Classes
AnyRef → Any
val analyzedPlan: LogicalPlan
Definition Classes
StreamExecution
final def asInstanceOf[T0]: T0
Definition Classes
Any
def asyncLogPurgeShutdown(): Unit
Attributes
protected
Definition Classes
AsyncLogPurge
def availableOffsets: StreamProgress
Get the end or formerly know as "available" offsets of the latest batch that has been planned
Get the end or formerly know as "available" offsets of the latest batch that has been planned
Definition Classes
StreamExecution
def awaitInitialization(timeoutMs: Long): Unit
Await until all fields of the query have been initialized.
Await until all fields of the query have been initialized.
Definition Classes
StreamExecution
val awaitProgressLock: ReentrantLock
A lock used to wait/notify when batches complete.
A lock used to wait/notify when batches complete. Use a fair lock to avoid thread starvation.
Attributes
protected
Definition Classes
StreamExecution
val awaitProgressLockCondition: Condition
Attributes
protected
Definition Classes
StreamExecution
def awaitTermination(timeoutMs: Long): Boolean
Definition Classes
StreamExecution → StreamingQuery
def awaitTermination(): Unit
Definition Classes
StreamExecution → StreamingQuery
def checkpointFile(name: String): String
Returns the path of a file with name in the checkpoint directory.
Returns the path of a file with name in the checkpoint directory.
Attributes
protected
Definition Classes
StreamExecution
def cleanUpLastExecutedMicroBatch(execCtx: MicroBatchExecutionContext): Unit
Attributes
protected
def cleanup(): Unit
Any clean up that needs to happen when the query is stopped or exits
Any clean up that needs to happen when the query is stopped or exits
Definition Classes
MicroBatchExecution → StreamExecution
def clone(): AnyRef
Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws(classOf[java.lang.CloneNotSupportedException]) @IntrinsicCandidate() @native()
val commitLog: CommitLog
A log that records the batch ids that have completed.
A log that records the batch ids that have completed. This is used to check if a batch was fully processed, and its output was committed to the sink, hence no need to process it again. This is used (for instance) during restart, to help identify which batch to run next.
Definition Classes
StreamExecution
def commitSources(offsetSeq: OffsetSeq): Unit
Attributes
protected
var committedOffsets: StreamProgress
Tracks how much data we have processed and committed to the sink or state store from each input source.
Tracks how much data we have processed and committed to the sink or state store from each input source. Only the scheduler thread should modify this field, and only in atomic steps. Other threads should make a shallow copy if they are going to access this field more than once, since the field's value may change at any time.
Definition Classes
StreamExecution
def createWrite(table: SupportsWrite, options: Map[String, String], inputPlan: LogicalPlan): Write
Attributes
protected
Definition Classes
StreamExecution
final def eq(arg0: AnyRef): Boolean
Definition Classes
AnyRef
def equals(arg0: AnyRef): Boolean
Definition Classes
AnyRef → Any
val errorNotifier: ErrorNotifier
Attributes
protected[sql]
Definition Classes
MicroBatchExecution → AsyncLogPurge
def exception: Option[StreamingQueryException]
Returns the StreamingQueryException if the query was terminated by an exception.
Returns the StreamingQueryException if the query was terminated by an exception.
Definition Classes
StreamExecution → StreamingQuery
def explain(): Unit
Definition Classes
StreamExecution → StreamingQuery
def explain(extended: Boolean): Unit
Definition Classes
StreamExecution → StreamingQuery
def explainInternal(extended: Boolean): String
Expose for tests
Expose for tests
Definition Classes
StreamExecution
def getBatchDescriptionString: String
Attributes
protected
Definition Classes
StreamExecution
final def getClass(): Class[_ <: AnyRef]
Definition Classes
AnyRef → Any
Annotations
@IntrinsicCandidate() @native()
def getLatestExecutionContext(): StreamExecutionContext
Get the latest execution context .
Get the latest execution context .
Definition Classes
MicroBatchExecution → StreamExecution
def getStartOffsetsOfLatestBatch: StreamProgress
Get the start offsets of the latest batch that has been planned
Get the start offsets of the latest batch that has been planned
Definition Classes
StreamExecution
def getTrigger(): TriggerExecutor
Attributes
protected
def hashCode(): Int
Definition Classes
AnyRef → Any
Annotations
@IntrinsicCandidate() @native()
val id: UUID
Definition Classes
StreamExecution → StreamingQuery
def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
Attributes
protected
Definition Classes
Logging
def initializeLogIfNecessary(isInterpreter: Boolean): Unit
Attributes
protected
Definition Classes
Logging
def interruptAndAwaitExecutionThreadTermination(): Unit
Interrupts the query execution thread and awaits its termination until until it exceeds the timeout.
Interrupts the query execution thread and awaits its termination until until it exceeds the timeout. The timeout can be set on "spark.sql.streaming.stopTimeout".
Attributes
protected
Definition Classes
StreamExecution
Annotations
@throws(scala.this.throws.<init>$default$1[java.util.concurrent.TimeoutException])
Exceptions thrown
TimeoutException If the thread cannot be stopped within the timeout
def isActive: Boolean
Whether the query is currently active or not
Whether the query is currently active or not
Definition Classes
StreamExecution → StreamingQuery
final def isInstanceOf[T0]: Boolean
Definition Classes
Any
def isTraceEnabled(): Boolean
Attributes
protected
Definition Classes
Logging
def lastExecution: IncrementalExecution
Definition Classes
StreamExecution
def lastProgress: StreamingQueryProgress
Definition Classes
StreamExecution → StreamingQuery
def latestOffsets: StreamProgress
Definition Classes
StreamExecution
def log: Logger
Attributes
protected
Definition Classes
Logging
def logDebug(msg: => String, throwable: Throwable): Unit
Attributes
protected
Definition Classes
Logging
def logDebug(entry: LogEntry, throwable: Throwable): Unit
Attributes
protected
Definition Classes
Logging
def logDebug(entry: LogEntry): Unit
Attributes
protected
Definition Classes
Logging
def logDebug(msg: => String): Unit
Attributes
protected
Definition Classes
Logging
def logError(msg: => String, throwable: Throwable): Unit
Attributes
protected
Definition Classes
Logging
def logError(entry: LogEntry, throwable: Throwable): Unit
Attributes
protected
Definition Classes
Logging
def logError(entry: LogEntry): Unit
Attributes
protected
Definition Classes
Logging
def logError(msg: => String): Unit
Attributes
protected
Definition Classes
Logging
def logInfo(msg: => String, throwable: Throwable): Unit
Attributes
protected
Definition Classes
Logging
def logInfo(entry: LogEntry, throwable: Throwable): Unit
Attributes
protected
Definition Classes
Logging
def logInfo(entry: LogEntry): Unit
Attributes
protected
Definition Classes
Logging
def logInfo(msg: => String): Unit
Attributes
protected
Definition Classes
Logging
def logName: String
Attributes
protected
Definition Classes
Logging
def logTrace(msg: => String, throwable: Throwable): Unit
Attributes
protected
Definition Classes
Logging
def logTrace(entry: LogEntry, throwable: Throwable): Unit
Attributes
protected
Definition Classes
Logging
def logTrace(entry: LogEntry): Unit
Attributes
protected
Definition Classes
Logging
def logTrace(msg: => String): Unit
Attributes
protected
Definition Classes
Logging
def logWarning(msg: => String, throwable: Throwable): Unit
Attributes
protected
Definition Classes
Logging
def logWarning(entry: LogEntry, throwable: Throwable): Unit
Attributes
protected
Definition Classes
Logging
def logWarning(entry: LogEntry): Unit
Attributes
protected
Definition Classes
Logging
def logWarning(msg: => String): Unit
Attributes
protected
Definition Classes
Logging
var loggingThreadContext: Instance
Attributes
protected
Definition Classes
StreamExecution
lazy val logicalPlan: LogicalPlan
The base logical plan which will be used across batch runs.
The base logical plan which will be used across batch runs. Once the value is set, it should not be modified.
Definition Classes
MicroBatchExecution → StreamExecution
def markMicroBatchEnd(execCtx: MicroBatchExecutionContext): Unit
Called after the microbatch has completed execution.
Called after the microbatch has completed execution. It takes care of committing the offset to commit log and other bookkeeping.
Attributes
protected
def markMicroBatchExecutionStart(execCtx: MicroBatchExecutionContext): Unit
Method called once after the planning is done and before the start of the microbatch execution.
Method called once after the planning is done and before the start of the microbatch execution. It can be used to perform any pre-execution tasks.
Attributes
protected
def markMicroBatchStart(execCtx: MicroBatchExecutionContext): Unit
Called at the start of the micro batch with given offsets.
Called at the start of the micro batch with given offsets. It takes care of offset checkpointing to offset log and any microbatch startup tasks.
Attributes
protected
val minLogEntriesToMaintain: Int
Attributes
protected
Definition Classes
StreamExecution
val name: String
Definition Classes
StreamExecution → StreamingQuery
final def ne(arg0: AnyRef): Boolean
Definition Classes
AnyRef
var noNewData: Boolean
A flag to indicate that a batch has completed with no new data available.
A flag to indicate that a batch has completed with no new data available.
Attributes
protected
Definition Classes
StreamExecution
final def notify(): Unit
Definition Classes
AnyRef
Annotations
@IntrinsicCandidate() @native()
final def notifyAll(): Unit
Definition Classes
AnyRef
Annotations
@IntrinsicCandidate() @native()
val offsetLog: OffsetSeqLog
A write-ahead-log that records the offsets that are present in each batch.
A write-ahead-log that records the offsets that are present in each batch. In order to ensure that a given batch will always consist of the same data, we write to this log *before* any processing is done. Thus, the Nth record in this log indicated data that is currently being processed and the N-1th entry indicates which offsets have been durably committed to the sink.
Definition Classes
StreamExecution
val outputMode: OutputMode
Definition Classes
StreamExecution
val pollingDelayMs: Long
Attributes
protected
Definition Classes
StreamExecution
def populateStartOffsets(execCtx: MicroBatchExecutionContext, sparkSessionToRunBatches: classic.SparkSession): Unit
Populate the start offsets to start the execution at the current offsets stored in the sink (i.e.
Populate the start offsets to start the execution at the current offsets stored in the sink (i.e. avoid reprocessing data that we have already processed). This function must be called before any processing occurs and will populate the following fields in the execution context of this micro-batch
- batchId
- startOffset
- endOffsets The basic structure of this method is as follows:
Identify (from the offset log) the offsets used to run the last batch IF last batch exists THEN Set the next batch to be executed as the last recovered batch Check the commit log to see which batch was committed last IF the last batch was committed THEN Call getBatch using the last batch start and end offsets // ^^^^ above line is needed since some sources assume last batch always re-executes Setup for a new batch i.e., start = last batch end, and identify new end DONE ELSE Identify a brand new batch DONE
Attributes
protected
def postEvent(event: Event): Unit
Attributes
protected
Definition Classes
StreamExecution
val prettyIdString: String
Pretty identified string of printing in logs.
Pretty identified string of printing in logs. Format is If name is set "queryName [id = xyz, runId = abc]" else "[id = xyz, runId = abc]"
Attributes
protected
Definition Classes
StreamExecution
def processAllAvailable(): Unit
Definition Classes
StreamExecution → StreamingQuery
val progressReporter: ProgressReporter
Attributes
protected
Definition Classes
StreamExecution
def purge(threshold: Long): Unit
Attributes
protected
Definition Classes
StreamExecution
def purgeAsync(batchId: Long): Unit
Attributes
protected
Definition Classes
AsyncLogPurge
def purgeStatefulMetadata(plan: SparkPlan): Unit
Attributes
protected
Definition Classes
StreamExecution
def purgeStatefulMetadataAsync(plan: SparkPlan): Unit
Attributes
protected
Definition Classes
AsyncLogPurge
val queryExecutionThread: QueryExecutionThread
The thread that runs the micro-batches of this stream.
The thread that runs the micro-batches of this stream. Note that this thread must be org.apache.spark.util.UninterruptibleThread to workaround KAFKA-1894: interrupting a running KafkaConsumer may cause endless loop.
Definition Classes
StreamExecution
def recentProgress: Array[StreamingQueryProgress]
Definition Classes
StreamExecution → StreamingQuery
val resolvedCheckpointRoot: String
Definition Classes
StreamExecution
def runActivatedStream(sparkSessionForStream: classic.SparkSession): Unit
Repeatedly attempts to run batches as data arrives.
Repeatedly attempts to run batches as data arrives.
Attributes
protected
Definition Classes
MicroBatchExecution → StreamExecution
val runId: UUID
Definition Classes
StreamExecution → StreamingQuery
def setLatestExecutionContext(ctx: StreamExecutionContext): Unit
We will only set the lastExecutionContext only if the batch id is larger than the batch id of the current latestExecutionContext.
We will only set the lastExecutionContext only if the batch id is larger than the batch id of the current latestExecutionContext. This is done to make sure we will always tracking the latest execution context i.e. we will never set latestExecutionContext to a earlier / older batch.
val sink: Table
Definition Classes
StreamExecution
var sources: Seq[SparkDataStream]
The list of stream instances which will be used across batch runs.
The list of stream instances which will be used across batch runs. Once the value is set, it should not be modified.
Attributes
protected
Definition Classes
MicroBatchExecution → StreamExecution
val sparkSession: classic.SparkSession
<invalid inheritdoc annotation>
<invalid inheritdoc annotation>
Definition Classes
StreamExecution → StreamingQuery → StreamingQuery
val sparkSessionForStream: classic.SparkSession
Isolated spark session to run the batches with.
Isolated spark session to run the batches with.
Attributes
protected
Definition Classes
StreamExecution
def start(): Unit
Starts the execution.
Starts the execution. This returns only after the thread has started and QueryStartedEvent has been posted to all the listeners.
Definition Classes
StreamExecution
val state: AtomicReference[State]
Defines the internal state of execution
Defines the internal state of execution
Attributes
protected
Definition Classes
StreamExecution
def status: StreamingQueryStatus
Definition Classes
StreamExecution → StreamingQuery
def stop(): Unit
Signals to the thread executing micro-batches that it should stop running after the next batch.
Signals to the thread executing micro-batches that it should stop running after the next batch. This method blocks until the thread stops running.
Definition Classes
MicroBatchExecution → StreamingQuery
def stopSources(): Unit
Stops all streaming sources safely.
Stops all streaming sources safely.
Attributes
protected
Definition Classes
StreamExecution
var streamDeathCause: StreamingQueryException
Attributes
protected
Definition Classes
StreamExecution
val streamMetadata: StreamMetadata
Metadata associated with the whole query
Metadata associated with the whole query
Attributes
protected
Definition Classes
StreamExecution
lazy val streamMetrics: MetricsReporter
Used to report metrics to coda-hale.
Used to report metrics to coda-hale. This uses id for easier tracking across restarts.
Definition Classes
StreamExecution
final def synchronized[T0](arg0: => T0): T0
Definition Classes
AnyRef
def toString(): String
Definition Classes
StreamExecution → AnyRef → Any
val trigger: Trigger
Definition Classes
StreamExecution
val triggerClock: Clock
Definition Classes
StreamExecution
var triggerExecutor: TriggerExecutor
Attributes
protected[sql]
var uniqueSources: Map[SparkDataStream, ReadLimit]
A list of unique sources in the query plan.
A list of unique sources in the query plan. This will be set when generating logical plan.
Attributes
protected
Definition Classes
StreamExecution
lazy val useAsyncPurge: Boolean
Attributes
protected
Definition Classes
AsyncLogPurge
def validateOffsetLogAndGetPrevOffset(latestBatchId: Long): Option[OffsetSeq]
Conduct sanity checks on the offset log to make sure it is correct and expected.
Conduct sanity checks on the offset log to make sure it is correct and expected. Also return the previous offset written to the offset log
latestBatchId
the batch id of the current micro batch
returns
A option that contains the offset of the previously written batch
final def wait(arg0: Long, arg1: Int): Unit
Definition Classes
AnyRef
Annotations
@throws(classOf[java.lang.InterruptedException])
final def wait(arg0: Long): Unit
Definition Classes
AnyRef
Annotations
@throws(classOf[java.lang.InterruptedException]) @native()
final def wait(): Unit
Definition Classes
AnyRef
Annotations
@throws(classOf[java.lang.InterruptedException])
val watermarkMsMap: Map[Int, Long]
A map of current watermarks, keyed by the position of the watermark operator in the physical plan.
A map of current watermarks, keyed by the position of the watermark operator in the physical plan.
This state is 'soft state', which does not affect the correctness and semantics of watermarks and is not persisted across query restarts. The fault-tolerant watermark state is in offsetSeqMetadata.
Attributes
protected
Definition Classes
StreamExecution
var watermarkTracker: WatermarkTracker
Attributes
protected
def withLogContext(context: Map[String, String])(body: => Unit): Unit
Attributes
protected
Definition Classes
Logging

Deprecated Value Members

def finalize(): Unit
Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws(classOf[java.lang.Throwable]) @Deprecated
Deprecated
(Since version 9)

Packages

MicroBatchExecution

Companion object MicroBatchExecution

class MicroBatchExecution extends StreamExecution with AsyncLogPurge

Instance Constructors

Type Members

Value Members

Deprecated Value Members

Inherited from AsyncLogPurge

Inherited from StreamExecution

Inherited from Logging

Inherited from StreamingQuery

Inherited from StreamingQuery

Inherited from AnyRef

Inherited from Any

Ungrouped

Packages

MicroBatchExecution

Companion object MicroBatchExecution

class MicroBatchExecution extends StreamExecution with AsyncLogPurge

Instance Constructors

Type Members

Value Members

Deprecated Value Members

Inherited from AsyncLogPurge

Inherited from StreamExecution

Inherited from Logging

Inherited from StreamingQuery

Inherited from StreamingQuery

Inherited from AnyRef

Inherited from Any

Ungrouped

MicroBatchExecution