object SchemaConverter extends Logging
SchemaConverter will rename the column of a dataset/dataframe according to the given case class T.
import io.github.setl.annotations.colName case class MyObject(@colName("col1") column1: String, column2: String) val ds: Dataset[MyObject] = Seq(MyObject("a", "A"), MyObject("b", "B")).toDS() // +-------+-------+ // |column1|column2| // +-------+-------+ // | a| A| // | b| B| // +-------+-------+ val df = SchemaConverter.toDF(ds) // +----+-------+ // |col1|column2| // +----+-------+ // | a| A| // | b| B| // +----+-------+ val ds2 = SchemaConverter.fromDF[MyObject](df) // +-------+-------+ // |column1|column2| // +-------+-------+ // | a| A| // | b| B| // +-------+-------+
- Alphabetic
- By Inheritance
- SchemaConverter
- Logging
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
compressColumn(structType: StructType)(dataFrame: DataFrame): DataFrame
For column having the annotation @Compress(compressor), compress the column with the given compressor
For column having the annotation @Compress(compressor), compress the column with the given compressor
- structType
structType containing the meta-information of the source DataFrame
- dataFrame
DataFrame to be compressed
- returns
a new DataFrame with compressed column(s)
-
def
decompressColumn(structType: StructType)(dataFrame: DataFrame): DataFrame
Decompress a DataFrame having compressed column(s)
Decompress a DataFrame having compressed column(s)
- structType
structType containing the meta-information of the target DataFrame
- dataFrame
DataFrame to be decompressed
- returns
a DataFrame with column(s) decompressed
-
def
dropCompoundKeyColumns(structType: StructType)(dataFrame: DataFrame): DataFrame
Drop all compound key columns
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
fromDF[T](dataFrame: DataFrame)(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): Dataset[T]
Convert a DataFrame to Dataset according to the annotations
Convert a DataFrame to Dataset according to the annotations
- T
type of dataset
- dataFrame
input df
- Annotations
- @throws( ... )
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
def
log: Logger
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logName: String
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
replaceDFColNameByFieldName(structType: StructType)(dataFrame: DataFrame): DataFrame
import io.github.setl.annotations.ColumnName case class MyObject(@ColumnName("col1") column1: String, column2: String) convert +----+-------+ |col1|column2| +----+-------+ | a| A| | b| B| +----+-------+ to +-------+-------+ |column1|column2| +-------+-------+ | a| A| | b| B| +-------+-------+
- structType
StrutType containing metadata of column name
- dataFrame
the raw DataFrame loaded from a data persistence store
- returns
a new DataFrame with renamed columns
-
def
replaceFieldNameByColumnName(structType: StructType)(dataFrame: DataFrame): DataFrame
import io.github.setl.annotations.ColumnName case class MyObject(@ColumnName("col1") column1: String, column2: String) convert +-------+-------+ |column1|column2| +-------+-------+ | a| A| | b| B| +-------+-------+ to +----+-------+ |col1|column2| +----+-------+ | a| A| | b| B| +----+-------+
- structType
StrutType containing metadata of column name
- dataFrame
the DataFrame to be saved into a data persistence store
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toDF[T](dataset: Dataset[T])(implicit arg0: scala.reflect.api.JavaUniverse.TypeTag[T]): DataFrame
Convert a dataset to a DataFrame according to annotations
Convert a dataset to a DataFrame according to annotations
- T
type of dataset
- dataset
input dataset
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()