p

ai.chronon

spark

package spark

Ordering
  1. Alphabetic
Visibility
  1. Public
  2. All

Type Members

  1. class Analyzer extends AnyRef
  2. class Args extends ScallopConf
  3. sealed trait BaseKvRdd extends AnyRef
  4. case class BootstrapInfo(joinConf: api.Join, joinParts: Seq[JoinPartMetadata], externalParts: Seq[ExternalPartMetadata], derivations: Array[StructField], hashToSchema: Map[String, Array[StructField]]) extends Product with Serializable
  5. class ChrononDeltaLakeKryoRegistrator extends ChrononKryoRegistrator
  6. class ChrononKryoRegistrator extends KryoRegistrator
  7. case class CoveringSet(hashes: Seq[String], rowCount: Long, isCovering: Boolean) extends Product with Serializable
  8. class CpcSketchKryoSerializer extends Serializer[CpcSketch]
  9. sealed trait DataRange extends AnyRef
  10. case class DefaultFormatProvider(sparkSession: SparkSession) extends FormatProvider with Product with Serializable

    Default format provider implementation based on default Chronon supported open source library versions.

  11. class DummyExtensions extends (SparkSessionExtensions) ⇒ Unit
  12. case class ExternalPartMetadata(externalPart: ExternalPart, keySchema: Array[StructField], valueSchema: Array[StructField]) extends Product with Serializable
  13. trait Format extends AnyRef

    Trait to track the table format in use by a Chronon dataset and some utility methods to help retrieve metadata / configure it appropriately at creation time

  14. trait FormatProvider extends Serializable

    Dynamically provide the read / write table format depending on table name.

    Dynamically provide the read / write table format depending on table name. This supports reading/writing tables with heterogeneous formats. This approach enables users to override and specify a custom format provider if needed. This is useful in cases such as leveraging different library versions from what we support in the Chronon project (e.g. newer delta lake) as well as working with custom internal company logic / checks.

  15. class GroupBy extends Serializable
  16. class GroupByUpload extends Serializable
  17. sealed case class IncompatibleSchemaException(inconsistencies: Seq[(String, DataType, DataType)]) extends Exception with Product with Serializable
  18. class ItemSketchSerializable extends Serializable
  19. class ItemsSketchKryoSerializer[T] extends Serializer[ItemsSketchIR[T]]
  20. class Join extends JoinBase
  21. abstract class JoinBase extends AnyRef
  22. case class JoinPartMetadata(joinPart: JoinPart, keySchema: Array[StructField], valueSchema: Array[StructField], derivationDependencies: Map[StructField, Seq[StructField]]) extends Product with Serializable
  23. case class KeyWithHash(data: Array[Any], hash: Array[Byte], hashInt: Int) extends Serializable with Product
  24. case class KvRdd(data: RDD[(Array[Any], Array[Any])], keySchema: StructType, valueSchema: StructType)(implicit sparkSession: SparkSession) extends BaseKvRdd with Product with Serializable
  25. class LabelJoin extends AnyRef
  26. class LocalTableExporter extends AnyRef
  27. class LogFlattenerJob extends Serializable

    Purpose of LogFlattenerJob is to unpack serialized Avro data from online requests and flatten each field (both keys and values) into individual columns and save to an offline "flattened" log table.

    Purpose of LogFlattenerJob is to unpack serialized Avro data from online requests and flatten each field (both keys and values) into individual columns and save to an offline "flattened" log table.

    Steps: 1. determine unfilled range and pull raw logs from partitioned log table 2. fetch joinCodecs for all unique schema_hash present in the logs 3. build a merged schema from all schema versions, which will be used as output schema 4. unpack each row and adhere to the output schema 5. save the schema info in the flattened log table properties (cumulatively)

  28. case class LoggingSchema(keyCodec: AvroCodec, valueCodec: AvroCodec) extends Product with Serializable
  29. case class PartitionRange(start: String, end: String)(implicit tableUtils: TableUtils) extends DataRange with Ordered[PartitionRange] with Product with Serializable
  30. case class SemanticHashException(message: String) extends Exception with Product with Serializable
  31. case class SemanticHashHiveMetadata(semanticHash: Map[String, String], excludeTopic: Boolean) extends Product with Serializable
  32. class StagingQuery extends AnyRef
  33. case class TableUtils(sparkSession: SparkSession) extends Product with Serializable
  34. case class TimeRange(start: Long, end: Long)(implicit tableUtils: TableUtils) extends DataRange with Product with Serializable
  35. case class TimedKvRdd(data: RDD[(Array[Any], Array[Any], Long)], keySchema: StructType, valueSchema: StructType, storeSchemasPrefix: Option[String] = None)(implicit sparkSession: SparkSession) extends BaseKvRdd with Product with Serializable

Value Members

  1. object BootstrapInfo extends Serializable
  2. object Comparison
  3. object CoveringSet extends Serializable
  4. object DeltaLake extends Format with Product with Serializable
  5. object Driver
  6. object EncoderUtil
  7. object Extensions
  8. object FastHashing
  9. object GenericRowHandler
  10. object GroupBy extends Serializable
  11. object GroupByUpload extends Serializable
  12. object Hive extends Format with Product with Serializable
  13. object Iceberg extends Format with Product with Serializable
  14. object JoinUtils
  15. object LocalDataLoader
  16. object LocalTableExporter
  17. object LogFlattenerJob extends Serializable
  18. object LogUtils
  19. object LoggingSchema extends Serializable
  20. object MetadataExporter
  21. object SemanticHashUtils
  22. object SparkConstants
  23. object SparkSessionBuilder
  24. object StagingQuery

Ungrouped