Metadata

Specify Schema properties. These properties may be specified at the schema or domain level Any property not specified at the schema level is taken from the one specified at the domain level or else the default value is returned.

mode

: FILE mode by default. FILE and STREAM are the two accepted values. FILE is currently the only supported mode.

format

: DSV by default. Supported file formats are :

DSV : Delimiter-separated values file. Delimiter value iss specified in the "separator" field.
POSITION : FIXED format file where values are located at an exact position in each line.
SIMPLE_JSON : For optimisation purpose, we differentiate JSON with top level values from JSON with deep level fields. SIMPLE_JSON are JSON files with top level fields only.
JSON : Deep JSON file. Use only when your json documents contain subdocuments, otherwise prefer to use SIMPLE_JSON since it is much faster.
XML : XML files

encoding

: UTF-8 if not specified.

multiline

: are json objects on a single line or multiple line ? Single by default. false means single. false also means faster

array

: Is the json stored as a single object array ? false by default. This means that by default we have on json document per line.

withHeader

: does the dataset has a header ? true bu default

separator

: the values delimiter, ';' by default value may be a multichar string starting from Spark3

quote

: The String quote char, '"' by default

escape

: escaping char '\' by default

write

: Write mode, APPEND by default

partition

: Partition columns, no partitioning by default

sink

: should the dataset be indexed in elasticsearch after ingestion ?

ignore

: Pattern to ignore or UDF to apply to ignore some lines

clustering

: List of attributes to use for clustering

xml

: com.databricks.spark.xml options to use (eq. rowTag)

Linear Supertypes

Serializable, Serializable, Product, Equals, AnyRef, Any

Instance Constructors

new Metadata(mode: Option[Mode] = None, format: Option[Format] = None, encoding: Option[String] = None, multiline: Option[Boolean] = None, array: Option[Boolean] = None, withHeader: Option[Boolean] = None, separator: Option[String] = None, quote: Option[String] = None, escape: Option[String] = None, write: Option[WriteMode] = None, partition: Option[Partition] = None, sink: Option[Sink] = None, ignore: Option[String] = None, clustering: Option[Seq[String]] = None, xml: Option[Map[String, String]] = None)
mode
: FILE mode by default. FILE and STREAM are the two accepted values. FILE is currently the only supported mode.
format
: DSV by default. Supported file formats are :
- DSV : Delimiter-separated values file. Delimiter value iss specified in the "separator" field.
- POSITION : FIXED format file where values are located at an exact position in each line.
- SIMPLE_JSON : For optimisation purpose, we differentiate JSON with top level values from JSON with deep level fields. SIMPLE_JSON are JSON files with top level fields only.
- JSON : Deep JSON file. Use only when your json documents contain subdocuments, otherwise prefer to use SIMPLE_JSON since it is much faster.
- XML : XML files
encoding
: UTF-8 if not specified.
multiline
: are json objects on a single line or multiple line ? Single by default. false means single. false also means faster
array
: Is the json stored as a single object array ? false by default. This means that by default we have on json document per line.
withHeader
: does the dataset has a header ? true bu default
separator
: the values delimiter, ';' by default value may be a multichar string starting from Spark3
quote
: The String quote char, '"' by default
escape
: escaping char '\' by default
write
: Write mode, APPEND by default
partition
: Partition columns, no partitioning by default
sink
: should the dataset be indexed in elasticsearch after ingestion ?
ignore
: Pattern to ignore or UDF to apply to ignore some lines
clustering
: List of attributes to use for clustering
xml
: com.databricks.spark.xml options to use (eq. rowTag)

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
val array: Option[Boolean]

: Is the json stored as a single object array ? false by default.
: Is the json stored as a single object array ? false by default. This means that by default we have on json document per line.
final def asInstanceOf[T0]: T0

Definition Classes
Any
def checkValidity(schemaHandler: SchemaHandler): Either[List[String], Boolean]
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
val clustering: Option[Seq[String]]

: List of attributes to use for clustering
val encoding: Option[String]

: UTF-8 if not specified.
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
val escape: Option[String]

: escaping char '\' by default
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
val format: Option[Format]

: DSV by default.
: DSV by default. Supported file formats are :
- DSV : Delimiter-separated values file. Delimiter value iss specified in the "separator" field.
- POSITION : FIXED format file where values are located at an exact position in each line.
- SIMPLE_JSON : For optimisation purpose, we differentiate JSON with top level values from JSON with deep level fields. SIMPLE_JSON are JSON files with top level fields only.
- JSON : Deep JSON file. Use only when your json documents contain subdocuments, otherwise prefer to use SIMPLE_JSON since it is much faster.
- XML : XML files
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getEncoding(): String
def getEscape(): String
def getFormat(): Format
def getMode(): Mode
def getMultiline(): Boolean
def getPartitionAttributes(): List[String]

Annotations
@JsonIgnore()
def getQuote(): String
def getSamplingStrategy(): Double

Annotations
@JsonIgnore()
def getSeparator(): String
def getSink(): Option[Sink]
def getWrite(): WriteMode
val ignore: Option[String]

: Pattern to ignore or UDF to apply to ignore some lines
def import(child: Metadata): Metadata

Merge this metadata with its child.
Merge this metadata with its child. Any property defined at the child level overrides the one defined at this level This allow a schema to override the domain metadata attribute Applied to a Domain level metadata
child
: Schema level metadata
returns
the metadata resulting of the merge of the schema and the domain metadata.
def isArray(): Boolean
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def isWithHeader(): Boolean
def merge[T](parent: Option[T], child: Option[T]): Option[T]

Merge a single attribute
Merge a single attribute
parent
: Domain level metadata attribute
child
: Schema level metadata attribute
returns
attribute if merge, the domain attribute otherwise.

Attributes
protected
val mode: Option[Mode]

: FILE mode by default.
: FILE mode by default. FILE and STREAM are the two accepted values. FILE is currently the only supported mode.
val multiline: Option[Boolean]

: are json objects on a single line or multiple line ? Single by default.
: are json objects on a single line or multiple line ? Single by default. false means single. false also means faster
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
val partition: Option[Partition]

: Partition columns, no partitioning by default
val quote: Option[String]

: The String quote char, '"' by default
val separator: Option[String]

: the values delimiter, ';' by default value may be a multichar string starting from Spark3
val sink: Option[Sink]

: should the dataset be indexed in elasticsearch after ingestion ?
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
Metadata → AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
val withHeader: Option[Boolean]

: does the dataset has a header ? true bu default
val write: Option[WriteMode]

: Write mode, APPEND by default
val xml: Option[Map[String, String]]

: com.databricks.spark.xml options to use (eq.
: com.databricks.spark.xml options to use (eq. rowTag)

Related Docs: object Metadata | package model

Instance Constructors

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

val array: Option[Boolean]

final def asInstanceOf[T0]: T0

def checkValidity(schemaHandler: SchemaHandler): Either[List[String], Boolean]

def clone(): AnyRef

val clustering: Option[Seq[String]]

val encoding: Option[String]

final def eq(arg0: AnyRef): Boolean

val escape: Option[String]

def finalize(): Unit

val format: Option[Format]

final def getClass(): Class[_]

def getEncoding(): String

def getEscape(): String

def getFormat(): Format

def getMode(): Mode

def getMultiline(): Boolean

def getPartitionAttributes(): List[String]

def getQuote(): String

def getSamplingStrategy(): Double

def getSeparator(): String

def getSink(): Option[Sink]

def getWrite(): WriteMode

val ignore: Option[String]

def import(child: Metadata): Metadata

def isArray(): Boolean

final def isInstanceOf[T0]: Boolean

def isWithHeader(): Boolean

def merge[T](parent: Option[T], child: Option[T]): Option[T]

val mode: Option[Mode]

val multiline: Option[Boolean]

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

val partition: Option[Partition]

val quote: Option[String]

val separator: Option[String]

val sink: Option[Sink]

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

val withHeader: Option[Boolean]

val write: Option[WriteMode]

val xml: Option[Map[String, String]]

Inherited from Serializable

Inherited from Serializable

Inherited from Product

Inherited from Equals

Inherited from AnyRef

Inherited from Any

Ungrouped