DataSplitUtility

Instance Constructors

new DataSplitUtility(mainDataset: DataFrame, kIterations: Int, splitMethod: String, labelColumn: String, rootDir: String, persistMode: String, modelFamily: String, parallelism: Int, trainPortion: Double, syntheticCol: String, trainSplitChronologicalColumn: String, trainSplitChronologicalRandomPercentage: Double, reductionFactor: Double)

mainDataset
Dataset that contains feature vector, out of DataPrep phase, ready to be split into
kIterations
number of 'copies' of the split to perform in order to fulfill the number of kFold models to be built
splitMethod
The type of split being performed (i.e. 'stratified', 'random', 'kSample')
labelColumn
Name of the label column
rootDir
Source directory to use to build the delta persisted data sets if using 'delta' mode in persistMode
persistMode
'cache', 'persist' or 'delta' - how to retain each of the kFold train/test splits.
modelFamily
The model family in order to determine how many parts in which to repartition the train and test splits for optimal performance.

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def formRootPath(configStoreLocation: String): String

Definition Classes
SplitUtilityTooling
def formTrainTestPaths(configStoreLocation: String): TrainTestPaths

Definition Classes
SplitUtilityTooling
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def performSplit: Array[TrainSplitReferences]

Wrapper interface for performing the splits, dependent on mode
Wrapper interface for performing the splits, dependent on mode
returns
Array[TrainSplitReferences] from the above methods.
lazy val sc: SparkContext

Definition Classes
SparkSessionWrapper
lazy val spark: SparkSession

Definition Classes
SparkSessionWrapper
def storeLoadDelta(trainData: DataFrame, testData: DataFrame, paths: TrainTestPaths): TrainTestData

Definition Classes
SplitUtilityTooling
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final val uniqueLabels: Array[Row]
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Docs: object DataSplitUtility | package split

class DataSplitUtility extends SplitUtilityTooling

Instance Constructors

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

def formRootPath(configStoreLocation: String): String

def formTrainTestPaths(configStoreLocation: String): TrainTestPaths

final def getClass(): Class[_]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def performSplit: Array[TrainSplitReferences]

lazy val sc: SparkContext

lazy val spark: SparkSession

def storeLoadDelta(trainData: DataFrame, testData: DataFrame, paths: TrainTestPaths): TrainTestData

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final val uniqueLabels: Array[Row]

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from SplitUtilityTooling

Inherited from SparkSessionWrapper

Inherited from Serializable

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Ungrouped