KSamplingBase

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def defaultFeaturesCol: String

Definition Classes
KSamplingDefaults
def defaultFieldsToIgnore: Array[String]

Definition Classes
KSamplingDefaults
def defaultFill: Map[DataType, Any]

Definition Classes
KSamplingDefaults
def defaultHashTables: Int

Definition Classes
KSamplingDefaults
def defaultKGroups: Int

Definition Classes
KSamplingDefaults
def defaultKMeansDistanceMeasurement: String

Definition Classes
KSamplingDefaults
def defaultKMeansMaxIter: Int

Definition Classes
KSamplingDefaults
def defaultKMeansPredictionCol: String

Definition Classes
KSamplingDefaults
def defaultKMeansSeed: Long

Definition Classes
KSamplingDefaults
def defaultKMeansTolerance: Double

Definition Classes
KSamplingDefaults
def defaultLSHOutputCol: String

Definition Classes
KSamplingDefaults
def defaultLSHSeed: Long

Definition Classes
KSamplingDefaults
def defaultLabelCol: String

Definition Classes
KSamplingDefaults
def defaultMinimumVectorCountToMutate: Int

Definition Classes
KSamplingDefaults
def defaultMutationMode: String

Definition Classes
KSamplingDefaults
def defaultMutationValue: Double

Definition Classes
KSamplingDefaults
def defaultQuorumCount: Int

Definition Classes
KSamplingDefaults
def defaultSyntheticCol: String

Definition Classes
KSamplingDefaults
def defaultVectorMutationMethod: String

Definition Classes
KSamplingDefaults
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getKSamplingConfig: KSamplingConfiguration

Public method for returning the current state of the configuration as a new instance of the KSamplingConfiguration
Public method for returning the current state of the configuration as a new instance of the KSamplingConfiguration
returns
the current state of the KSamplingConfiguration conf
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
lazy val sc: SparkContext

Definition Classes
SparkSessionWrapper
def setFeaturesCol(value: String): KSamplingBase.this.type

Setter for the Feature Column name of the input DataFrame
Setter for the Feature Column name of the input DataFrame
value
String: name of the feature vector column
returns
this
def setFieldsToIgnore(value: Array[String]): KSamplingBase.this.type

Setter to provide a listing of any fields that are intended to be ignored in the generated dataframe
Setter to provide a listing of any fields that are intended to be ignored in the generated dataframe
value
Array[String]: field names to ignore in the data generation aspect
returns
this
def setKGroups(value: Int): KSamplingBase.this.type

Setter for specifying the number of K-Groups to generate in the KMeans model
Setter for specifying the number of K-Groups to generate in the KMeans model
value
Int: number of k groups to generate
returns
this
def setKMeansDistanceMeasurement(value: String): KSamplingBase.this.type

Setter for which distance measurement to use to calculate the nearness of vectors to a centroid
Setter for which distance measurement to use to calculate the nearness of vectors to a centroid
value
String: Options -> "euclidean" or "cosine" Default: "euclidean"
returns
this

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException() if an invalid value is entered
def setKMeansMaxIter(value: Int): KSamplingBase.this.type

Setter for specifying the maximum number of iterations for the KMeans model to go through to converge
Setter for specifying the maximum number of iterations for the KMeans model to go through to converge
value
Int: Maximum limit on iterations
returns
this
def setKMeansPredictionCol(value: String): KSamplingBase.this.type

Setter for the internal KMeans column for cluster membership attribution
Setter for the internal KMeans column for cluster membership attribution
value
String: column name for internal algorithm column for group membership
returns
this
def setKMeansSeed(value: Long): KSamplingBase.this.type

Setter for a KMeans seed for the clustering algorithm
Setter for a KMeans seed for the clustering algorithm
value
Long: Seed value
returns
this
def setKMeansTolerance(value: Double): KSamplingBase.this.type

Setter for Setting the tolerance for KMeans (must be >0)
Setter for Setting the tolerance for KMeans (must be >0)
value
The tolerance value setting for KMeans
returns
this

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException() if a value less than 0 is entered
See also
reference: http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.ml.clustering.KMeans for further details.
def setLSHHashTables(value: Int): KSamplingBase.this.type

Setter for Configuring the number of Hash Tables to use for MinHashLSH
Setter for Configuring the number of Hash Tables to use for MinHashLSH
value
Int: Count of hash tables to use
returns
this

See also
http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.ml.feature.MinHashLSH for more information
def setLSHOutputCol(value: String): KSamplingBase.this.type

Setter for the internal LSH output hash information column
Setter for the internal LSH output hash information column
value
String: column name for the internal MinHashLSH Model transformation value
returns
this
def setLSHSeed(value: Long): KSamplingBase.this.type

Setter for a MinHashLSH seed value for the model.
Setter for a MinHashLSH seed value for the model.
value
Long: a seed value
returns
this
def setLabelCol(value: String): KSamplingBase.this.type

Setter for the Label Column name of the input DataFrame
Setter for the Label Column name of the input DataFrame
value
String: name of the label column
returns
this
def setMinimumVectorCountToMutate(value: Int): KSamplingBase.this.type

Setter for minimum threshold for vector indexes to mutate within the feature vector.
Setter for minimum threshold for vector indexes to mutate within the feature vector.
value
The minimum (or fixed) number of indexes to mutate.
returns
this

Note
In vectorMutationMethod "fixed" this sets the fixed count of how many vector positions to mutate. In vectorMutationMethod "random" this sets the lower threshold for 'at least this many indexes will be mutated'
def setMutationMode(value: String): KSamplingBase.this.type

Setter for the Mutation Mode of the feature vector individual values
Setter for the Mutation Mode of the feature vector individual values
value
String: the mode to use.
returns
this

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException() if the mode is not supported.
Note
Options: "weighted" - uses weighted averaging to scale the euclidean distance between the centroid vector and mutation candidate vectors "random" - randomly selects a position on the euclidean vector between the centroid vector and the candidate mutation vectors "ratio" - uses a ratio between the values of the centroid vector and the mutation vector *
def setMutationValue(value: Double): KSamplingBase.this.type

Setter for specifying the mutation magnitude for the modes 'weighted' and 'ratio' in mutationMode
Setter for specifying the mutation magnitude for the modes 'weighted' and 'ratio' in mutationMode
value
Double: value between 0 and 1 for mutation magnitude adjustment.
returns
this

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException() if the value specified is outside of the range (0, 1)
Note
the higher this value, the closer to the centroid vector vs. the candidate mutation vector the synthetic row data will be.
def setQuorumCount(value: Int): KSamplingBase.this.type

Setter for how many vectors to find in adjacency to the centroid for generation of synthetic data
Setter for how many vectors to find in adjacency to the centroid for generation of synthetic data
value
Int: Number of vectors to find nearest each centroid within the class
returns
this

Note
the higher the value set here, the higher the variance in synthetic data generation
def setSyntheticCol(value: String): KSamplingBase.this.type

Setter for the name to be used for the synthetic column flag that is attached to the output dataframe as an indication that the data present is generated and not original.
Setter for the name to be used for the synthetic column flag that is attached to the output dataframe as an indication that the data present is generated and not original.
value
String: name to be used throughout the job to delineate the fact that the data in the row is generated.
returns
this
def setVectorMutationMethod(value: String): KSamplingBase.this.type

Setter for the Vector Mutation Method
Setter for the Vector Mutation Method
value
String - the mode to use.
returns
this

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException() if the mode is not supported.
Note
Options: "fixed" - will use the value of minimumVectorCountToMutate to select random indexes of this number of indexes. "random" - will use this number as a lower bound on a random selection of indexes between this and the vector length. "all" - will mutate all of the vectors.
lazy val spark: SparkSession

Definition Classes
SparkSessionWrapper
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Doc: package feature

trait KSamplingBase extends KSamplingDefaults with SparkSessionWrapper

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

def defaultFeaturesCol: String

def defaultFieldsToIgnore: Array[String]

def defaultFill: Map[DataType, Any]

def defaultHashTables: Int

def defaultKGroups: Int

def defaultKMeansDistanceMeasurement: String

def defaultKMeansMaxIter: Int

def defaultKMeansPredictionCol: String

def defaultKMeansSeed: Long

def defaultKMeansTolerance: Double

def defaultLSHOutputCol: String

def defaultLSHSeed: Long

def defaultLabelCol: String

def defaultMinimumVectorCountToMutate: Int

def defaultMutationMode: String

def defaultMutationValue: Double

def defaultQuorumCount: Int

def defaultSyntheticCol: String

def defaultVectorMutationMethod: String

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def getKSamplingConfig: KSamplingConfiguration

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

lazy val sc: SparkContext

def setFeaturesCol(value: String): KSamplingBase.this.type

def setFieldsToIgnore(value: Array[String]): KSamplingBase.this.type

def setKGroups(value: Int): KSamplingBase.this.type

def setKMeansDistanceMeasurement(value: String): KSamplingBase.this.type

def setKMeansMaxIter(value: Int): KSamplingBase.this.type

def setKMeansPredictionCol(value: String): KSamplingBase.this.type

def setKMeansSeed(value: Long): KSamplingBase.this.type

def setKMeansTolerance(value: Double): KSamplingBase.this.type

def setLSHHashTables(value: Int): KSamplingBase.this.type

def setLSHOutputCol(value: String): KSamplingBase.this.type

def setLSHSeed(value: Long): KSamplingBase.this.type

def setLabelCol(value: String): KSamplingBase.this.type

def setMinimumVectorCountToMutate(value: Int): KSamplingBase.this.type

def setMutationMode(value: String): KSamplingBase.this.type

def setMutationValue(value: Double): KSamplingBase.this.type

def setQuorumCount(value: Int): KSamplingBase.this.type

def setSyntheticCol(value: String): KSamplingBase.this.type

def setVectorMutationMethod(value: String): KSamplingBase.this.type

lazy val spark: SparkSession

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from SparkSessionWrapper

Inherited from Serializable

Inherited from Serializable

Inherited from KSamplingDefaults

Inherited from AnyRef

Inherited from Any

Ungrouped