Object

io.smartdatalake.workflow.action

ActionHelper

Related Doc: package action

Permalink

object ActionHelper extends SmartDataLakeLogger

Linear Supertypes
SmartDataLakeLogger, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. ActionHelper
  2. SmartDataLakeLogger
  3. AnyRef
  4. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Value Members

  1. final def !=(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  4. def applyAdditional(subFeed: SparkSubFeed, additional: (SparkSubFeed, Option[DataFrame], Seq[String], LocalDateTime) ⇒ SparkSubFeed, output: TableDataObject)(implicit session: SparkSession, context: ActionPipelineContext): SparkSubFeed

    Permalink

    applies an optional additional transformation

  5. def applyBlackWhitelists(subFeed: SparkSubFeed, columnBlacklist: Option[Seq[String]], columnWhitelist: Option[Seq[String]]): SparkSubFeed

    Permalink

    applies columnBlackList and columnWhitelist

  6. def applyCastDecimal2IntegralFloat(subFeed: SparkSubFeed): SparkSubFeed

    Permalink

    applies type casting decimal -> integral/float

  7. def applyCustomTransformation(inputSubFeed: SparkSubFeed, transformer: Option[CustomDfTransformerConfig])(implicit session: SparkSession): SparkSubFeed

    Permalink

    applies the transformers

  8. def applyExecutionMode(executionMode: ExecutionMode, actionId: ActionObjectId, input: DataObject, output: DataObject, partitionValues: Seq[PartitionValues])(implicit session: SparkSession): Seq[PartitionValues]

    Permalink

    Apply execution mode to partition values

  9. def applyFilter(subFeed: SparkSubFeed, filterClauseExpr: Option[Column]): SparkSubFeed

    Permalink

    applies filterClauseExpr

  10. def applyTransformations(inputSubFeed: SparkSubFeed, transformer: Option[CustomDfTransformerConfig], columnBlacklist: Option[Seq[String]], columnWhitelist: Option[Seq[String]], standardizeDatatypes: Boolean, output: DataObject, additional: Option[(SparkSubFeed, Option[DataFrame], Seq[String], LocalDateTime) ⇒ SparkSubFeed], filterClauseExpr: Option[Column] = None)(implicit session: SparkSession, context: ActionPipelineContext): SparkSubFeed

    Permalink

    applies all the transformations above

  11. final def asInstanceOf[T0]: T0

    Permalink
    Definition Classes
    Any
  12. def checkDataFrameNotNewerThan(timestamp: LocalDateTime, df: DataFrame, tstmpColName: String)(implicit session: SparkSession): Unit

    Permalink

    Check plausibility of latest timestamp of a DataFrame vs.

    Check plausibility of latest timestamp of a DataFrame vs. a given timestamp. Throws exception if not successful.

    timestamp

    to compare with

    df

    DataFrame to compare with

    tstmpColName

    the timestamp column of the dataframe

  13. def clone(): AnyRef

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  14. def dropDuplicates(pks: Seq[String])(df: DataFrame): DataFrame

    Permalink
  15. def enrichSubFeedDataFrame(input: DataObject with CanCreateDataFrame, subFeed: SparkSubFeed)(implicit session: SparkSession): SparkSubFeed

    Permalink

    Enriches SparkSubFeed with DataFrame if not existing

    Enriches SparkSubFeed with DataFrame if not existing

    input

    input data object.

    subFeed

    input SubFeed.

  16. final def eq(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  17. def equals(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  18. def filterBlacklist(columnBlacklist: Seq[String])(df: DataFrame): DataFrame

    Permalink

    Remove all columns in blacklist from a DataFrame.

    Remove all columns in blacklist from a DataFrame.

    columnBlacklist

    columns to remove

    df

    DataFrame to be filtered

    returns

    DataFrame with all columns in blacklist removed

  19. def filterDataFrame(df: DataFrame, partitionValues: Seq[PartitionValues]): DataFrame

    Permalink

    Filter DataFrame with given partition values

    Filter DataFrame with given partition values

    df

    DataFrame to filter

    partitionValues

    partition values to use as filter condition

    returns

    filtered DataFrame

  20. def filterWhitelist(columnWhitelist: Seq[String])(df: DataFrame): DataFrame

    Permalink

    Removes all columns from a DataFrame except those specified in whitelist.

    Removes all columns from a DataFrame except those specified in whitelist.

    columnWhitelist

    columns to keep

    df

    DataFrame to be filtered

    returns

    DataFrame with all columns removed except those specified in whitelist

  21. def finalize(): Unit

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  22. final def getClass(): Class[_]

    Permalink
    Definition Classes
    AnyRef → Any
  23. def hashCode(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  24. final def isInstanceOf[T0]: Boolean

    Permalink
    Definition Classes
    Any
  25. lazy val logger: Logger

    Permalink
    Attributes
    protected
    Definition Classes
    SmartDataLakeLogger
  26. def multiTransformSubfeed(subFeed: SparkSubFeed, transformers: Seq[(DataFrame) ⇒ DataFrame]): SparkSubFeed

    Permalink

    applies multiple transformations to a sequence of subfeeds

  27. def multiTransformSubfeeds(subFeeds: Seq[SparkSubFeed], transformers: Seq[(DataFrame) ⇒ DataFrame]): Seq[SparkSubFeed]

    Permalink

    applies multiple transformations to a sequence of subfeeds

  28. final def ne(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  29. final def notify(): Unit

    Permalink
    Definition Classes
    AnyRef
  30. final def notifyAll(): Unit

    Permalink
    Definition Classes
    AnyRef
  31. def replaceSpecialCharactersWithUnderscore(str: String): String

    Permalink

    Replace all special characters in a String with underscore Used to get valid temp view names

  32. def searchCommonInits(partitions1: Seq[String], partitions2: Seq[String]): Seq[Seq[String]]

    Permalink

    search common inits between to partition column definitions

  33. def searchGreatestCommonInit(partitions1: Seq[String], partitions2: Seq[String]): Option[Seq[String]]

    Permalink

    search greatest common init between to partition column definitions

  34. final def synchronized[T0](arg0: ⇒ T0): T0

    Permalink
    Definition Classes
    AnyRef
  35. def toString(): String

    Permalink
    Definition Classes
    AnyRef → Any
  36. def transformSubfeeds(subFeeds: Seq[SparkSubFeed], transformer: (DataFrame) ⇒ DataFrame): Seq[SparkSubFeed]

    Permalink

    transform sequence of subfeeds

  37. def ts1(t: LocalDateTime): Column

    Permalink

    create util literal column from

  38. def validateAndUpdateSubFeedPartitionValues(output: DataObject, subFeed: SparkSubFeed): SparkSubFeed

    Permalink

    Updates the partition values of a SubFeed to the partition columns of an output, removing not existing columns from the partition values.

    Updates the partition values of a SubFeed to the partition columns of an output, removing not existing columns from the partition values. Further the transformed DataFrame is validated to have the output's partition columns included.

    output

    output DataObject

    subFeed

    SubFeed with transformed DataFrame

    returns

    SubFeed with updated partition values.

  39. def validateDataFrameContainsCols(df: DataFrame, columns: Seq[String], debugName: String): Unit

    Permalink

    Validate that DataFrame contains a given list of columns, throwing an exception otherwise.

    Validate that DataFrame contains a given list of columns, throwing an exception otherwise.

    df

    DataFrame to validate

    columns

    Columns that must exist in DataFrame

    debugName

    name to mention in exception

  40. final def wait(): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  41. final def wait(arg0: Long, arg1: Int): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  42. final def wait(arg0: Long): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )

Inherited from SmartDataLakeLogger

Inherited from AnyRef

Inherited from Any

Ungrouped