StatsCompute

Instance Constructors

new StatsCompute(inputDf: DataFrame, keys: Seq[String], name: String)

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def addDerivedMetrics(df: DataFrame, aggregator: RowAggregator): DataFrame

Given a summary Dataframe that computed the stats.
Given a summary Dataframe that computed the stats. Add derived data (example: null rate, median, etc)
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def dailySummary(aggregator: RowAggregator, sample: Double = 1.0, timeBucketMinutes: Long = 60): TimedKvRdd

Navigate the dataframe and compute statistics partitioned by date stamp
Navigate the dataframe and compute statistics partitioned by date stamp
Partitioned by day version of the normalized summary. Useful for scheduling a job that computes daily stats. Returns a KvRdd to be able to be pushed into a KvStore for fetching and merging. As well as a dataframe for storing in hive.
For entity on the left we use daily partition as the key. For events we bucket by timeBucketMinutes (def. 1 hr) Since the stats are mergeable coarser granularities can be obtained through fetcher merging.
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
val metrics: Seq[MetricTransform]
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
lazy val selectedDf: DataFrame
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
implicit val tableUtils: TableUtils
val timeColumns: Seq[String]
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Doc: package stats

class StatsCompute extends Serializable

Instance Constructors

new StatsCompute(inputDf: DataFrame, keys: Seq[String], name: String)

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

def addDerivedMetrics(df: DataFrame, aggregator: RowAggregator): DataFrame

final def asInstanceOf[T0]: T0

def clone(): AnyRef

def dailySummary(aggregator: RowAggregator, sample: Double = 1.0, timeBucketMinutes: Long = 60): TimedKvRdd

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

val metrics: Seq[MetricTransform]

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

lazy val selectedDf: DataFrame

final def synchronized[T0](arg0: ⇒ T0): T0

implicit val tableUtils: TableUtils

val timeColumns: Seq[String]

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Serializable

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Ungrouped