InferSchemaJob

Instance Constructors

new InferSchemaJob()(implicit settings: Settings)

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def createDataFrameWithFormat(lines: List[String], dataPath: String, header: Boolean): DataFrame

Create the dataframe with its associated format
Create the dataframe with its associated format
lines
: list of lines read from file
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getDomainDirectoryName(path: Path): String

Get domain directory name
Get domain directory name
path
: file path
returns
the domain directory name
def getFormatFile(lines: List[String]): String

Get format file by using the first and the last line of the dataset We use mapPartitionsWithIndex to retrieve these information to make sure that the first line really corresponds to the first line (same for the last)
Get format file by using the first and the last line of the dataset We use mapPartitionsWithIndex to retrieve these information to make sure that the first line really corresponds to the first line (same for the last)
lines
: list of lines read from file
def getSchemaPattern(path: Path): String

Get schema pattern
Get schema pattern
path
: file path
returns
the schema pattern
def getSeparator(lines: List[String]): String

Get separator file by taking the character that appears the most in 10 lines of the dataset
Get separator file by taking the character that appears the most in 10 lines of the dataset
lines
: list of lines read from file
returns
the file separator
def hashCode(): Int

Definition Classes
AnyRef → Any
def infer(domainName: String, schemaName: String, dataPath: String, savePath: String, header: Boolean): Try[Unit]

Just to force any spark job to implement its entry point using within the "run" method
Just to force any spark job to implement its entry point using within the "run" method
returns
: Spark Session used for the job
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def name: String
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def readFile(path: Path): Dataset[String]

Read file without specifying the format
Read file without specifying the format
path
: file path
returns
a dataset of string that contains data file
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Doc: package infer

class InferSchemaJob extends AnyRef

Instance Constructors

new InferSchemaJob()(implicit settings: Settings)

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

def createDataFrameWithFormat(lines: List[String], dataPath: String, header: Boolean): DataFrame

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def getDomainDirectoryName(path: Path): String

def getFormatFile(lines: List[String]): String

def getSchemaPattern(path: Path): String

def getSeparator(lines: List[String]): String

def hashCode(): Int

def infer(domainName: String, schemaName: String, dataPath: String, savePath: String, header: Boolean): Try[Unit]

final def isInstanceOf[T0]: Boolean

def name: String

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def readFile(path: Path): Dataset[String]

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from AnyRef

Inherited from Any

Ungrouped