Object

io.github.setl.internal

SchemaConverter

Related Doc: package internal

Permalink

object SchemaConverter extends Logging

SchemaConverter will rename the column of a dataset/dataframe according to the given case class T.

import io.github.setl.annotations.colName
case class MyObject(@colName("col1") column1: String, column2: String)

val ds: Dataset[MyObject] = Seq(MyObject("a", "A"), MyObject("b", "B")).toDS()
// +-------+-------+
// |column1|column2|
// +-------+-------+
// |      a|      A|
// |      b|      B|
// +-------+-------+

val df = SchemaConverter.toDF(ds)
// +----+-------+
// |col1|column2|
// +----+-------+
// |   a|      A|
// |   b|      B|
// +----+-------+

val ds2 = SchemaConverter.fromDF[MyObject](df)
// +-------+-------+
// |column1|column2|
// +-------+-------+
// |      a|      A|
// |      b|      B|
// +-------+-------+
Linear Supertypes
Logging, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. SchemaConverter
  2. Logging
  3. AnyRef
  4. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Value Members

  1. final def !=(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0

    Permalink
    Definition Classes
    Any
  5. def clone(): AnyRef

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  6. def compressColumn(structType: StructType)(dataFrame: DataFrame): DataFrame

    Permalink

    For column having the annotation @Compress(compressor), compress the column with the given compressor

    For column having the annotation @Compress(compressor), compress the column with the given compressor

    structType

    structType containing the meta-information of the source DataFrame

    dataFrame

    DataFrame to be compressed

    returns

    a new DataFrame with compressed column(s)

  7. def decompressColumn(structType: StructType)(dataFrame: DataFrame): DataFrame

    Permalink

    Decompress a DataFrame having compressed column(s)

    Decompress a DataFrame having compressed column(s)

    structType

    structType containing the meta-information of the target DataFrame

    dataFrame

    DataFrame to be decompressed

    returns

    a DataFrame with column(s) decompressed

  8. def dropCompoundKeyColumns(structType: StructType)(dataFrame: DataFrame): DataFrame

    Permalink

    Drop all compound key columns

  9. final def eq(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  10. def equals(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  11. def finalize(): Unit

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  12. def fromDF[T](dataFrame: DataFrame)(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Dataset[T]

    Permalink

    Convert a DataFrame to Dataset according to the annotations

    Convert a DataFrame to Dataset according to the annotations

    T

    type of dataset

    dataFrame

    input df

    Annotations
    @throws( ... )
  13. final def getClass(): Class[_]

    Permalink
    Definition Classes
    AnyRef → Any
  14. def hashCode(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  15. final def isInstanceOf[T0]: Boolean

    Permalink
    Definition Classes
    Any
  16. def log: Logger

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  17. def logDebug(msg: ⇒ String): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  18. def logError(msg: ⇒ String): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  19. def logInfo(msg: ⇒ String): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  20. def logName: String

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  21. def logTrace(msg: ⇒ String): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  22. def logWarning(msg: ⇒ String): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  23. final def ne(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  24. final def notify(): Unit

    Permalink
    Definition Classes
    AnyRef
  25. final def notifyAll(): Unit

    Permalink
    Definition Classes
    AnyRef
  26. def replaceDFColNameByFieldName(structType: StructType)(dataFrame: DataFrame): DataFrame

    Permalink

    import io.github.setl.annotations.ColumnName
    
    case class MyObject(@ColumnName("col1") column1: String, column2: String)
    
    convert
    +----+-------+
    |col1|column2|
    +----+-------+
    |   a|      A|
    |   b|      B|
    +----+-------+
    
    to
    +-------+-------+
    |column1|column2|
    +-------+-------+
    |      a|      A|
    |      b|      B|
    +-------+-------+
    structType

    StrutType containing metadata of column name

    dataFrame

    the raw DataFrame loaded from a data persistence store

    returns

    a new DataFrame with renamed columns

  27. def replaceFieldNameByColumnName(structType: StructType)(dataFrame: DataFrame): DataFrame

    Permalink

    import io.github.setl.annotations.ColumnName
    
    case class MyObject(@ColumnName("col1") column1: String, column2: String)
    
    convert
    +-------+-------+
    |column1|column2|
    +-------+-------+
    |      a|      A|
    |      b|      B|
    +-------+-------+
    
    to
    +----+-------+
    |col1|column2|
    +----+-------+
    |   a|      A|
    |   b|      B|
    +----+-------+
    structType

    StrutType containing metadata of column name

    dataFrame

    the DataFrame to be saved into a data persistence store

  28. final def synchronized[T0](arg0: ⇒ T0): T0

    Permalink
    Definition Classes
    AnyRef
  29. def toDF[T](dataset: Dataset[T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): DataFrame

    Permalink

    Convert a dataset to a DataFrame according to annotations

    Convert a dataset to a DataFrame according to annotations

    T

    type of dataset

    dataset

    input dataset

  30. def toString(): String

    Permalink
    Definition Classes
    AnyRef → Any
  31. final def wait(): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  32. final def wait(arg0: Long, arg1: Int): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  33. final def wait(arg0: Long): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )

Inherited from Logging

Inherited from AnyRef

Inherited from Any

Ungrouped