spark

package spark

Ordering

Alphabetic

Visibility

Public
All

Type Members

class Analyzer extends AnyRef
class Args extends ScallopConf
sealed trait BaseKvRdd extends AnyRef
case class BootstrapInfo(joinConf: api.Join, joinParts: Seq[JoinPartMetadata], externalParts: Seq[ExternalPartMetadata], derivations: Array[StructField], hashToSchema: Map[String, Array[StructField]]) extends Product with Serializable
class ChrononDeltaLakeKryoRegistrator extends ChrononKryoRegistrator
class ChrononKryoRegistrator extends KryoRegistrator
case class CoveringSet(hashes: Seq[String], rowCount: Long, isCovering: Boolean) extends Product with Serializable
class CpcSketchKryoSerializer extends Serializer[CpcSketch]
sealed trait DataRange extends AnyRef
case class DefaultFormatProvider(sparkSession: SparkSession) extends FormatProvider with Product with Serializable
Default format provider implementation based on default Chronon supported open source library versions.
class DummyExtensions extends (SparkSessionExtensions) ⇒ Unit
case class ExternalPartMetadata(externalPart: ExternalPart, keySchema: Array[StructField], valueSchema: Array[StructField]) extends Product with Serializable
trait Format extends AnyRef
Trait to track the table format in use by a Chronon dataset and some utility methods to help retrieve metadata / configure it appropriately at creation time
trait FormatProvider extends Serializable
Dynamically provide the read / write table format depending on table name.
Dynamically provide the read / write table format depending on table name. This supports reading/writing tables with heterogeneous formats. This approach enables users to override and specify a custom format provider if needed. This is useful in cases such as leveraging different library versions from what we support in the Chronon project (e.g. newer delta lake) as well as working with custom internal company logic / checks.
class GroupBy extends Serializable
class GroupByUpload extends Serializable
sealed case class IncompatibleSchemaException(inconsistencies: Seq[(String, DataType, DataType)]) extends Exception with Product with Serializable
class ItemSketchSerializable extends Serializable
class ItemsSketchKryoSerializer[T] extends Serializer[ItemsSketchIR[T]]
class Join extends JoinBase
abstract class JoinBase extends AnyRef
case class JoinPartMetadata(joinPart: JoinPart, keySchema: Array[StructField], valueSchema: Array[StructField], derivationDependencies: Map[StructField, Seq[StructField]]) extends Product with Serializable
case class KeyWithHash(data: Array[Any], hash: Array[Byte], hashInt: Int) extends Serializable with Product
case class KvRdd(data: RDD[(Array[Any], Array[Any])], keySchema: StructType, valueSchema: StructType)(implicit sparkSession: SparkSession) extends BaseKvRdd with Product with Serializable
class LabelJoin extends AnyRef
class LocalTableExporter extends AnyRef
class LogFlattenerJob extends Serializable
Purpose of LogFlattenerJob is to unpack serialized Avro data from online requests and flatten each field (both keys and values) into individual columns and save to an offline "flattened" log table.
Purpose of LogFlattenerJob is to unpack serialized Avro data from online requests and flatten each field (both keys and values) into individual columns and save to an offline "flattened" log table.
Steps: 1. determine unfilled range and pull raw logs from partitioned log table 2. fetch joinCodecs for all unique schema_hash present in the logs 3. build a merged schema from all schema versions, which will be used as output schema 4. unpack each row and adhere to the output schema 5. save the schema info in the flattened log table properties (cumulatively)
case class LoggingSchema(keyCodec: AvroCodec, valueCodec: AvroCodec) extends Product with Serializable
case class PartitionRange(start: String, end: String)(implicit tableUtils: TableUtils) extends DataRange with Ordered[PartitionRange] with Product with Serializable
case class SemanticHashException(message: String) extends Exception with Product with Serializable
case class SemanticHashHiveMetadata(semanticHash: Map[String, String], excludeTopic: Boolean) extends Product with Serializable
class StagingQuery extends AnyRef
case class TableUtils(sparkSession: SparkSession) extends Product with Serializable
case class TimeRange(start: Long, end: Long)(implicit tableUtils: TableUtils) extends DataRange with Product with Serializable
case class TimedKvRdd(data: RDD[(Array[Any], Array[Any], Long)], keySchema: StructType, valueSchema: StructType, storeSchemasPrefix: Option[String] = None)(implicit sparkSession: SparkSession) extends BaseKvRdd with Product with Serializable

Value Members

object BootstrapInfo extends Serializable
object Comparison
object CoveringSet extends Serializable
object DeltaLake extends Format with Product with Serializable
object Driver
object EncoderUtil
object Extensions
object FastHashing
object GenericRowHandler
object GroupBy extends Serializable
object GroupByUpload extends Serializable
object Hive extends Format with Product with Serializable
object Iceberg extends Format with Product with Serializable
object JoinUtils
object LocalDataLoader
object LocalTableExporter
object LogFlattenerJob extends Serializable
object LogUtils
object LoggingSchema extends Serializable
object MetadataExporter
object SemanticHashUtils
object SparkConstants
object SparkSessionBuilder
object StagingQuery

Packages

spark

package spark

Type Members

Value Members

Ungrouped

Packages

spark 

package spark

Type Members

Value Members

Ungrouped

spark