o

io.github.setl.internal

SchemaConverter

object SchemaConverter extends Logging

SchemaConverter will rename the column of a dataset/dataframe according to the given case class T.

import io.github.setl.annotations.colName
case class MyObject(@colName("col1") column1: String, column2: String)

val ds: Dataset[MyObject] = Seq(MyObject("a", "A"), MyObject("b", "B")).toDS()
// +-------+-------+
// |column1|column2|
// +-------+-------+
// |      a|      A|
// |      b|      B|
// +-------+-------+

val df = SchemaConverter.toDF(ds)
// +----+-------+
// |col1|column2|
// +----+-------+
// |   a|      A|
// |   b|      B|
// +----+-------+

val ds2 = SchemaConverter.fromDF[MyObject](df)
// +-------+-------+
// |column1|column2|
// +-------+-------+
// |      a|      A|
// |      b|      B|
// +-------+-------+
Linear Supertypes
Logging, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. SchemaConverter
  2. Logging
  3. AnyRef
  4. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  5. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  6. def compressColumn(structType: StructType)(dataFrame: DataFrame): DataFrame

    For column having the annotation @Compress(compressor), compress the column with the given compressor

    For column having the annotation @Compress(compressor), compress the column with the given compressor

    structType

    structType containing the meta-information of the source DataFrame

    dataFrame

    DataFrame to be compressed

    returns

    a new DataFrame with compressed column(s)

  7. def decompressColumn(structType: StructType)(dataFrame: DataFrame): DataFrame

    Decompress a DataFrame having compressed column(s)

    Decompress a DataFrame having compressed column(s)

    structType

    structType containing the meta-information of the target DataFrame

    dataFrame

    DataFrame to be decompressed

    returns

    a DataFrame with column(s) decompressed

  8. def dropCompoundKeyColumns(structType: StructType)(dataFrame: DataFrame): DataFrame

    Drop all compound key columns

  9. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  10. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  11. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  12. def fromDF[T](dataFrame: DataFrame)(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Dataset[T]

    Convert a DataFrame to Dataset according to the annotations

    Convert a DataFrame to Dataset according to the annotations

    T

    type of dataset

    dataFrame

    input df

    Annotations
    @throws( ... )
  13. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  14. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  15. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  16. def log: Logger
    Attributes
    protected
    Definition Classes
    Logging
  17. def logDebug(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  18. def logError(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  19. def logInfo(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  20. def logName: String
    Attributes
    protected
    Definition Classes
    Logging
  21. def logTrace(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  22. def logWarning(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  23. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  24. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  25. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  26. def replaceDFColNameByFieldName(structType: StructType)(dataFrame: DataFrame): DataFrame

    import io.github.setl.annotations.ColumnName
    
    case class MyObject(@ColumnName("col1") column1: String, column2: String)
    
    convert
    +----+-------+
    |col1|column2|
    +----+-------+
    |   a|      A|
    |   b|      B|
    +----+-------+
    
    to
    +-------+-------+
    |column1|column2|
    +-------+-------+
    |      a|      A|
    |      b|      B|
    +-------+-------+
    structType

    StrutType containing metadata of column name

    dataFrame

    the raw DataFrame loaded from a data persistence store

    returns

    a new DataFrame with renamed columns

  27. def replaceFieldNameByColumnName(structType: StructType)(dataFrame: DataFrame): DataFrame

    import io.github.setl.annotations.ColumnName
    
    case class MyObject(@ColumnName("col1") column1: String, column2: String)
    
    convert
    +-------+-------+
    |column1|column2|
    +-------+-------+
    |      a|      A|
    |      b|      B|
    +-------+-------+
    
    to
    +----+-------+
    |col1|column2|
    +----+-------+
    |   a|      A|
    |   b|      B|
    +----+-------+
    structType

    StrutType containing metadata of column name

    dataFrame

    the DataFrame to be saved into a data persistence store

  28. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  29. def toDF[T](dataset: Dataset[T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): DataFrame

    Convert a dataset to a DataFrame according to annotations

    Convert a dataset to a DataFrame according to annotations

    T

    type of dataset

    dataset

    input dataset

  30. def toString(): String
    Definition Classes
    AnyRef → Any
  31. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  32. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  33. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()

Inherited from Logging

Inherited from AnyRef

Inherited from Any

Ungrouped