package spark
- Alphabetic
- Public
- All
Type Members
- class Analyzer extends AnyRef
- class Args extends ScallopConf
- sealed trait BaseKvRdd extends AnyRef
- case class BootstrapInfo(joinConf: api.Join, joinParts: Seq[JoinPartMetadata], externalParts: Seq[ExternalPartMetadata], derivations: Array[StructField], hashToSchema: Map[String, Array[StructField]]) extends Product with Serializable
- class ChrononKryoRegistrator extends KryoRegistrator
- class CpcSketchKryoSerializer extends Serializer[CpcSketch]
- sealed trait DataRange extends AnyRef
- class DummyExtensions extends (SparkSessionExtensions) ⇒ Unit
- case class ExternalPartMetadata(externalPart: ExternalPart, keySchema: Array[StructField], valueSchema: Array[StructField]) extends Product with Serializable
- class GroupBy extends Serializable
- class GroupByUpload extends Serializable
- sealed case class IncompatibleSchemaException(inconsistencies: Seq[(String, DataType, DataType)]) extends Exception with Product with Serializable
- class ItemSketchSerializable extends Serializable
- class ItemsSketchKryoSerializer extends Serializer[ItemSketchSerializable]
- class Join extends JoinBase
- abstract class JoinBase extends AnyRef
- case class JoinPartMetadata(joinPart: JoinPart, keySchema: Array[StructField], valueSchema: Array[StructField], derivationDependencies: Map[StructField, Seq[StructField]]) extends Product with Serializable
- case class KeyWithHash(data: Array[Any], hash: Array[Byte], hashInt: Int) extends Serializable with Product
- case class KvRdd(data: RDD[(Array[Any], Array[Any])], keySchema: StructType, valueSchema: StructType)(implicit sparkSession: SparkSession) extends BaseKvRdd with Product with Serializable
- class LabelJoin extends AnyRef
- class LocalTableExporter extends AnyRef
-
class
LogFlattenerJob extends Serializable
Purpose of LogFlattenerJob is to unpack serialized Avro data from online requests and flatten each field (both keys and values) into individual columns and save to an offline "flattened" log table.
Purpose of LogFlattenerJob is to unpack serialized Avro data from online requests and flatten each field (both keys and values) into individual columns and save to an offline "flattened" log table.
Steps: 1. determine unfilled range and pull raw logs from partitioned log table 2. fetch joinCodecs for all unique schema_hash present in the logs 3. build a merged schema from all schema versions, which will be used as output schema 4. unpack each row and adhere to the output schema 5. save the schema info in the flattened log table properties (cumulatively)
- case class LoggingSchema(keyCodec: AvroCodec, valueCodec: AvroCodec) extends Product with Serializable
- case class PartitionRange(start: String, end: String)(implicit tableUtils: TableUtils) extends DataRange with Ordered[PartitionRange] with Product with Serializable
- class StagingQuery extends AnyRef
- case class TableUtils(sparkSession: SparkSession) extends Product with Serializable
- case class TimeRange(start: Long, end: Long)(implicit tableUtils: TableUtils) extends DataRange with Product with Serializable
- case class TimedKvRdd(data: RDD[(Array[Any], Array[Any], Long)], keySchema: StructType, valueSchema: StructType)(implicit sparkSession: SparkSession) extends BaseKvRdd with Product with Serializable
Value Members
- object BootstrapInfo extends Serializable
- object Comparison
- object Driver
- object Extensions
- object FastHashing
- object GenericRowHandler
- object GroupBy extends Serializable
- object GroupByUpload extends Serializable
- object JoinUtils
- object LocalDataLoader
- object LocalTableExporter
- object LogFlattenerJob extends Serializable
- object LogUtils
- object LoggingSchema extends Serializable
- object MetadataExporter
- object SparkConstants
- object SparkSessionBuilder
- object StagingQuery