Settings

Type Members

final case class Airflow(endpoint: String, ingest: String) extends Product with Serializable

endpoint
: Airflow REST API endpoint, aka. http://127.0.0.1:8080/api/experimental
final case class Area(pending: String, unresolved: String, archive: String, ingesting: String, accepted: String, rejected: String, business: String) extends Product with Serializable

datasets in the data pipeline go through several stages and are stored on disk at each of these stages.
datasets in the data pipeline go through several stages and are stored on disk at each of these stages. This setting allow to customize the folder names of each of these stages.
pending
: Name of the pending area
unresolved
: Named of the unresolved area
archive
: Name of the archive area
ingesting
: Name of the ingesting area
accepted
: Name of the accepted area
rejected
: Name of the rejected area
business
: Name of the business area
final case class Assertions(path: String, active: Boolean, sink: Sink) extends Product with Serializable
final case class Atlas(uri: String, user: String, password: String, owner: String) extends Product with Serializable
final case class Audit(path: String, sink: Sink, maxErrors: Int) extends Product with Serializable
final case class Comet(env: String, tmpdir: String, datasets: String, metadata: String, metrics: Metrics, audit: Audit, archive: Boolean, sinkToFile: Boolean, lock: Lock, defaultWriteFormat: String, defaultRejectedWriteFormat: String, defaultAuditWriteFormat: String, csvOutput: Boolean, privacyOnly: Boolean, launcher: String, chewerPrefix: String, rowValidatorClass: String, treeValidatorClass: String, loadStrategyClass: String, analyze: Boolean, hive: Boolean, grouped: Boolean, mergeForceDistinct: Boolean, area: Area, airflow: Airflow, elasticsearch: Elasticsearch, hadoop: Map[String, String], connections: Map[String, Connection], jdbcEngines: Map[String, JdbcEngine], atlas: Atlas, privacy: Privacy, fileSystem: Option[String], metadataFileSystem: Option[String], internal: Option[Internal], udfs: Option[String], assertions: Assertions, kafka: KafkaConfig, sqlParameterPattern: String) extends Serializable with Product

datasets
: Absolute path, datasets root folder beneath which each area is defined.
metadata
: Absolute path, location where all types / domains and auto jobs are defined
metrics
: Absolute path, location where all computed metrics are stored
audit
: Absolute path, location where all log are stored
archive
: Should we backup the ingested datasets ? true by default
defaultWriteFormat
: Choose between parquet, orc ... Default is parquet
defaultRejectedWriteFormat
: Writing format for rejected datasets, choose between parquet, orc ... Default is parquet
defaultAuditWriteFormat
: Writing format for audit datasets, choose between parquet, orc ... Default is parquet
launcher
: Cron Job Manager : simple (useful for testing) or airflow ? simple by default
analyze
: Should we create basics Hive statistics on the generated dataset ? true by default
hive
: Should we create a Hive Table ? true by default
area
: see Area above
airflow
: Airflow end point. Should be defined even if simple launccher is used instead of airflow.
final case class Connection(format: String = "jdbc", mode: Option[String] = None, options: Map[String, String] = Map.empty, engineOverride: Option[String] = None) extends Product with Serializable

Describes a connection to a JDBC-accessible database engine
Describes a connection to a JDBC-accessible database engine
format
source / sink format (jdbc by default). Cf spark.format possible values
mode
Spark SaveMode to use. If not present, the save mode will be computed from the write disposition set in the YAM file
options
any option required by the format used to ingest / tranform / compute the data. Eg for JDBC uri, user and password are required uri the URI of the database engine. It must start with "jdbc:" user the username under which to connect to the database engine password the password to use in order to connect to the database engine
engineOverride
the index into the Comet.jdbcEngines map of the underlying database engine, in case one cannot use the engine name from the uri

Note
the use case for engineOverride is when you need to have an alternate schema definition (e.g. non-standard table names) alongside with the regular schema definition, on the same underlying engine.
final case class Elasticsearch(active: Boolean, options: Map[String, String]) extends Product with Serializable
final case class Internal(cacheStorageLevel: StorageLevel, intermediateBigqueryFormat: String = "orc") extends Product with Serializable
final case class JdbcEngine(tables: Map[String, TableDdl]) extends Product with Serializable

Describes how to use a specific type of JDBC-accessible database engine
Describes how to use a specific type of JDBC-accessible database engine
tables
for each of the Standard Table Names used by Comet, the specific SQL DDL statements as expected in the engine's own dialect.
final case class KafkaConfig(serverOptions: Map[String, String], topics: Map[String, KafkaTopicConfig], cometOffsetsMode: Option[String] = Some("STREAM")) extends Product with Serializable
final case class KafkaTopicConfig(topicName: String, topicOffsetMode: Option[Mode] = Some(Mode.STREAM), maxRead: Long = 1, fields: List[String] = ..., partitions: Int = 1, replicationFactor: Short = 1, writeFormat: String = "parquet", createOptions: Map[String, String] = Map.empty, accessOptions: Map[String, String] = Map.empty) extends Product with Serializable
final case class Lock(path: String, timeout: Long, pollTime: FiniteDuration = ..., refreshTime: FiniteDuration = ...) extends Product with Serializable
final case class Metrics(path: String, discreteMaxCardinality: Int, active: Boolean, sink: Sink) extends Product with Serializable

discreteMaxCardinality
: Max number of unique values allowed in cardinality compute
final case class Privacy(options: Map[String, String]) extends Product with Serializable

options
: Map of privacy algorightms name -> PrivacyEngine

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
object Comet extends Serializable
object JdbcEngine extends Serializable
def apply(config: Config): Settings
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
val cometErrorMessageColumn: String
val cometInputFileNameColumn: String
val cometSuccessColumn: String
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
val logger: Logger

Attributes
protected
Definition Classes
StrictLogging
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Docs: class Settings | package config

object Settings extends StrictLogging with Serializable

Type Members

final case class Airflow(endpoint: String, ingest: String) extends Product with Serializable

final case class Area(pending: String, unresolved: String, archive: String, ingesting: String, accepted: String, rejected: String, business: String) extends Product with Serializable

final case class Assertions(path: String, active: Boolean, sink: Sink) extends Product with Serializable

final case class Atlas(uri: String, user: String, password: String, owner: String) extends Product with Serializable

final case class Audit(path: String, sink: Sink, maxErrors: Int) extends Product with Serializable

final case class Connection(format: String = "jdbc", mode: Option[String] = None, options: Map[String, String] = Map.empty, engineOverride: Option[String] = None) extends Product with Serializable

final case class Elasticsearch(active: Boolean, options: Map[String, String]) extends Product with Serializable

final case class Internal(cacheStorageLevel: StorageLevel, intermediateBigqueryFormat: String = "orc") extends Product with Serializable

final case class JdbcEngine(tables: Map[String, TableDdl]) extends Product with Serializable

final case class KafkaConfig(serverOptions: Map[String, String], topics: Map[String, KafkaTopicConfig], cometOffsetsMode: Option[String] = Some("STREAM")) extends Product with Serializable

final case class Lock(path: String, timeout: Long, pollTime: FiniteDuration = ..., refreshTime: FiniteDuration = ...) extends Product with Serializable

final case class Metrics(path: String, discreteMaxCardinality: Int, active: Boolean, sink: Sink) extends Product with Serializable

final case class Privacy(options: Map[String, String]) extends Product with Serializable

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

object Comet extends Serializable

object JdbcEngine extends Serializable

def apply(config: Config): Settings

final def asInstanceOf[T0]: T0

def clone(): AnyRef

val cometErrorMessageColumn: String

val cometInputFileNameColumn: String

val cometSuccessColumn: String

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

val logger: Logger

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Serializable

Inherited from Serializable

Inherited from StrictLogging

Inherited from AnyRef

Inherited from Any

Ungrouped