Instance Constructors
-
new
FileStreamSinkLog(sparkSession: SparkSession, path: String)
Value Members
-
final
def
!=(arg0: AnyRef): Boolean
-
final
def
!=(arg0: Any): Boolean
-
final
def
##(): Int
-
final
def
==(arg0: AnyRef): Boolean
-
final
def
==(arg0: Any): Boolean
-
def
add(batchId: Long, logs: Seq[SinkFileStatus]): Boolean
-
-
final
def
asInstanceOf[T0]: T0
-
def
batchIdToPath(batchId: Long): Path
-
def
clone(): AnyRef
-
def
deserialize(bytes: Array[Byte]): Seq[SinkFileStatus]
-
final
def
eq(arg0: AnyRef): Boolean
-
def
equals(arg0: Any): Boolean
-
-
def
finalize(): Unit
-
def
get(startId: Option[Long], endId: Option[Long]): Array[(Long, Seq[SinkFileStatus])]
-
def
get(batchId: Long): Option[Seq[SinkFileStatus]]
-
final
def
getClass(): Class[_]
-
def
getLatest(): Option[(Long, Seq[SinkFileStatus])]
-
def
hashCode(): Int
-
def
initializeLogIfNecessary(isInterpreter: Boolean): Unit
-
def
isBatchFile(path: Path): Boolean
-
final
def
isInstanceOf[T0]: Boolean
-
def
isTraceEnabled(): Boolean
-
def
log: Logger
-
def
logDebug(msg: ⇒ String, throwable: Throwable): Unit
-
def
logDebug(msg: ⇒ String): Unit
-
def
logError(msg: ⇒ String, throwable: Throwable): Unit
-
def
logError(msg: ⇒ String): Unit
-
def
logInfo(msg: ⇒ String, throwable: Throwable): Unit
-
def
logInfo(msg: ⇒ String): Unit
-
def
logName: String
-
def
logTrace(msg: ⇒ String, throwable: Throwable): Unit
-
def
logTrace(msg: ⇒ String): Unit
-
def
logWarning(msg: ⇒ String, throwable: Throwable): Unit
-
def
logWarning(msg: ⇒ String): Unit
-
val
metadataPath: Path
-
final
def
ne(arg0: AnyRef): Boolean
-
final
def
notify(): Unit
-
final
def
notifyAll(): Unit
-
def
pathToBatchId(path: Path): Long
-
def
serialize(logData: Seq[SinkFileStatus]): Array[Byte]
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
-
def
toString(): String
-
final
def
wait(): Unit
-
final
def
wait(arg0: Long, arg1: Int): Unit
-
final
def
wait(arg0: Long): Unit
Inherited from Logging
Inherited from AnyRef
Inherited from Any
A special log for FileStreamSink. It will write one log file for each batch. The first line of the log file is the version number, and there are multiple JSON lines following. Each JSON line is a JSON format of SinkFileStatus.
As reading from many small files is usually pretty slow, FileStreamSinkLog will compact log files every "spark.sql.sink.file.log.compactLen" batches into a big file. When doing a compaction, it will read all old log files and merge them with the new batch. During the compaction, it will also delete the files that are deleted (marked by SinkFileStatus.action). When the reader uses
allFiles
to list all files, this method only returns the visible files (drops the deleted files).