ingest

Type Members

case class AuditLog(jobid: String, paths: String, domain: String, schema: String, success: Boolean, count: Long, countAccepted: Long, countRejected: Long, timestamp: Timestamp, duration: Long, message: String, step: String) extends Product with Serializable
case class ContinuousMetricRecord(domain: String, schema: String, attribute: String, min: Option[Double], max: Option[Double], mean: Option[Double], missingValues: Option[Long], standardDev: Option[Double], variance: Option[Double], sum: Option[Double], skewness: Option[Double], kurtosis: Option[Double], percentile25: Option[Double], median: Option[Double], percentile75: Option[Double], count: Long, cometTime: Long, cometStage: String, cometMetric: String, jobId: String) extends Product with Serializable
case class DiscreteMetricRecord(domain: String, schema: String, attribute: String, missingValuesDiscrete: Long, countDistinct: Long, count: Long, cometTime: Long, cometStage: String, cometMetric: String, jobId: String) extends Product with Serializable
class DsvIngestionJob extends IngestionJob

Main class to ingest delimiter separated values file
case class FrequencyMetricRecord(domain: String, schema: String, attribute: String, category: String, frequency: Long, count: Long, cometTime: Long, cometStage: String, jobId: String) extends Product with Serializable
trait IngestionJob extends SparkJob
class JsonIngestionJob extends IngestionJob

Main class to complex json delimiter separated values file If your json contains only one level simple attribute aka.
Main class to complex json delimiter separated values file If your json contains only one level simple attribute aka. kind of dsv but in json format please use SIMPLE_JSON instead. It's way faster
class KafkaIngestionJob extends JsonIngestionJob

Main class to ingest JSON messages from Kafka
case class LoadConfig(domain: String = "", schema: String = "", paths: List[Path] = Nil, options: Map[String, String] = Map.empty) extends Product with Serializable

domain
domain name of the dataset
schema
schema name of the dataset
paths
Absolute path of the file to ingest (present in the ingesting area of the domain)
class PositionIngestionJob extends DsvIngestionJob

Main class to ingest delimiter separated values file
case class RejectedRecord(jobid: String, timestamp: Timestamp, domain: String, schema: String, error: String, path: String) extends Product with Serializable
class SimpleJsonIngestionJob extends DsvIngestionJob

Parse a simple one level json file.
Parse a simple one level json file. Complex types such as arrays & maps are not supported. Use JsonIngestionJob instead. This class is for simple json only that makes it way faster.
sealed case class Step(value: String) extends Product with Serializable
class XmlIngestionJob extends IngestionJob

Main class to XML file If your json contains only one level simple attribute aka.
Main class to XML file If your json contains only one level simple attribute aka. kind of dsv but in json format please use SIMPLE_JSON instead. It's way faster
class XmlSimplePrivacyJob extends IngestionJob

Used only to apply data masking rules (privacy) on one or more simple elements in XML data.
Used only to apply data masking rules (privacy) on one or more simple elements in XML data. The input XML file is read as a text file. Privacy rules are applied on the resulting DataFrame and the result is saved accepted area. In the definition of the XML Schema: - schema.metadata.format should be set to TEXT_XML - schema.attributes should only contain the attributes on which privacy should be applied Comet.defaultWriteFormat should be set text in order to have an XML formatted output file Comet.privacyOnly should be set to true to save the result in one file (coalesce 1)

Value Members

object ImprovedDataFrameContext
object IngestionUtil
object LoadConfig extends CliConfig[LoadConfig] with Serializable
object PositionIngestionUtil

The Spark task that run on each worker
object SparkAuditLogWriter
object Step extends Serializable
object XmlSimplePrivacyJob

package ingest

Type Members

case class AuditLog(jobid: String, paths: String, domain: String, schema: String, success: Boolean, count: Long, countAccepted: Long, countRejected: Long, timestamp: Timestamp, duration: Long, message: String, step: String) extends Product with Serializable

case class DiscreteMetricRecord(domain: String, schema: String, attribute: String, missingValuesDiscrete: Long, countDistinct: Long, count: Long, cometTime: Long, cometStage: String, cometMetric: String, jobId: String) extends Product with Serializable

class DsvIngestionJob extends IngestionJob

case class FrequencyMetricRecord(domain: String, schema: String, attribute: String, category: String, frequency: Long, count: Long, cometTime: Long, cometStage: String, jobId: String) extends Product with Serializable

trait IngestionJob extends SparkJob

class JsonIngestionJob extends IngestionJob

class KafkaIngestionJob extends JsonIngestionJob

case class LoadConfig(domain: String = "", schema: String = "", paths: List[Path] = Nil, options: Map[String, String] = Map.empty) extends Product with Serializable

class PositionIngestionJob extends DsvIngestionJob

case class RejectedRecord(jobid: String, timestamp: Timestamp, domain: String, schema: String, error: String, path: String) extends Product with Serializable

class SimpleJsonIngestionJob extends DsvIngestionJob

sealed case class Step(value: String) extends Product with Serializable

class XmlIngestionJob extends IngestionJob

class XmlSimplePrivacyJob extends IngestionJob

Value Members

object ImprovedDataFrameContext

object IngestionUtil

object LoadConfig extends CliConfig[LoadConfig] with Serializable

object PositionIngestionUtil

object SparkAuditLogWriter

object Step extends Serializable

object XmlSimplePrivacyJob

Ungrouped