SchemaConverter

object SchemaConverter extends Logging

SchemaConverter will rename the column of a dataset/dataframe according to the given case class T.

import io.github.setl.annotations.colName
case class MyObject(@colName("col1") column1: String, column2: String)

val ds: Dataset[MyObject] = Seq(MyObject("a", "A"), MyObject("b", "B")).toDS()
// +-------+-------+
// |column1|column2|
// +-------+-------+
// |      a|      A|
// |      b|      B|
// +-------+-------+

val df = SchemaConverter.toDF(ds)
// +----+-------+
// |col1|column2|
// +----+-------+
// |   a|      A|
// |   b|      B|
// +----+-------+

val ds2 = SchemaConverter.fromDF[MyObject](df)
// +-------+-------+
// |column1|column2|
// +-------+-------+
// |      a|      A|
// |      b|      B|
// +-------+-------+

Linear Supertypes

Logging, AnyRef, Any

Ordering

Alphabetic
By Inheritance

Inherited

SchemaConverter
Logging
AnyRef
Any

Hide All
Show All

Visibility

Public
All

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws( ... ) @native()
def compressColumn(structType: StructType)(dataFrame: DataFrame): DataFrame
For column having the annotation @Compress(compressor), compress the column with the given compressor
For column having the annotation @Compress(compressor), compress the column with the given compressor
structType
structType containing the meta-information of the source DataFrame
dataFrame
DataFrame to be compressed
returns
a new DataFrame with compressed column(s)
def decompressColumn(structType: StructType)(dataFrame: DataFrame): DataFrame
Decompress a DataFrame having compressed column(s)
Decompress a DataFrame having compressed column(s)
structType
structType containing the meta-information of the target DataFrame
dataFrame
DataFrame to be decompressed
returns
a DataFrame with column(s) decompressed
def dropCompoundKeyColumns(structType: StructType)(dataFrame: DataFrame): DataFrame
Drop all compound key columns
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def fromDF[T](dataFrame: DataFrame)(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Dataset[T]
Convert a DataFrame to Dataset according to the annotations
Convert a DataFrame to Dataset according to the annotations
T
type of dataset
dataFrame
input df

Annotations
@throws( ... )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
Annotations
@native()
def hashCode(): Int

Definition Classes
AnyRef → Any
Annotations
@native()
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
Annotations
@native()
final def notifyAll(): Unit

Definition Classes
AnyRef
Annotations
@native()

def replaceDFColNameByFieldName(structType: StructType)(dataFrame: DataFrame): DataFrame

import io.github.setl.annotations.ColumnName

case class MyObject(@ColumnName("col1") column1: String, column2: String)

convert
+----+-------+
|col1|column2|
+----+-------+
|   a|      A|
|   b|      B|
+----+-------+

to
+-------+-------+
|column1|column2|
+-------+-------+
|      a|      A|
|      b|      B|
+-------+-------+

structType: StrutType containing metadata of column name
dataFrame: the raw DataFrame loaded from a data persistence store
returns: a new DataFrame with renamed columns

def replaceFieldNameByColumnName(structType: StructType)(dataFrame: DataFrame): DataFrame

import io.github.setl.annotations.ColumnName

case class MyObject(@ColumnName("col1") column1: String, column2: String)

convert
+-------+-------+
|column1|column2|
+-------+-------+
|      a|      A|
|      b|      B|
+-------+-------+

to
+----+-------+
|col1|column2|
+----+-------+
|   a|      A|
|   b|      B|
+----+-------+

structType: StrutType containing metadata of column name
dataFrame: the DataFrame to be saved into a data persistence store

final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toDF[T](dataset: Dataset[T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): DataFrame
Convert a dataset to a DataFrame according to annotations
Convert a dataset to a DataFrame according to annotations
T
type of dataset
dataset
input dataset
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... ) @native()

Packages

SchemaConverter

object SchemaConverter extends Logging

Value Members

Inherited from Logging

Inherited from AnyRef

Inherited from Any

Ungrouped

Packages

SchemaConverter 

object SchemaConverter extends Logging

Value Members

Inherited from Logging

Inherited from AnyRef

Inherited from Any

Ungrouped

SchemaConverter