ConfigurationGenerator

Instance Constructors

new ConfigurationGenerator(modelFamily: String, predictionType: String, genericConfig: GenericConfig)

modelFamily
The model family that is desired to be run (e.g. 'RandomForest') Allowable Options: "Trees", "GBT", "RandomForest", "LinearRegression", "LogisticRegression", "XGBoost", "MLPC", "SVM"
predictionType
The modeling type that is desired to be run (e.g. 'classifier') Allowable Options: "classifier" or "regressor"
genericConfig
Configuration object from GenericConfigGenerator

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final val allowableCardinalilties: List[String]

Definition Classes
ConfigurationDefaults
final val allowableCategoricalFilterModes: List[String]

Definition Classes
ConfigurationDefaults
final val allowableCharacterFillStats: List[String]

Definition Classes
ConfigurationDefaults
final val allowableClassificationScoringMetrics: List[String]

Definition Classes
ConfigurationDefaults
final val allowableDateTimeConversionTypes: List[String]

Static restrictions
Static restrictions

Definition Classes
ConfigurationDefaults
final val allowableDateTimeConversions: List[String]

Definition Classes
ConfigurationDefaults
final val allowableEvolutionStrategies: List[String]

Definition Classes
ConfigurationDefaults
final val allowableFeatureImportanceCutoffTypes: List[String]

Definition Classes
ConfigurationDefaults
final val allowableFeatureInteractionModes: List[String]

Definition Classes
ConfigurationDefaults
final val allowableGeneticMBORegressorTypes: List[String]

Definition Classes
ConfigurationDefaults
final val allowableHyperSpaceModelTypes: List[String]

Definition Classes
ConfigurationDefaults
final val allowableInitialGenerationIndexMixingModes: List[String]

Definition Classes
ConfigurationDefaults
final val allowableInitialGenerationModes: List[String]

Definition Classes
ConfigurationDefaults
final val allowableKMeansDistanceMeasurements: List[String]

Definition Classes
ConfigurationDefaults
final val allowableLabelBalanceModes: List[String]

Definition Classes
ConfigurationDefaults
final val allowableMlFlowLoggingModes: List[String]

Definition Classes
ConfigurationDefaults
final val allowableMutationMagnitudeMode: List[String]

Definition Classes
ConfigurationDefaults
final val allowableMutationModes: List[String]

Definition Classes
ConfigurationDefaults
final val allowableMutationStrategies: List[String]

Definition Classes
ConfigurationDefaults
final val allowableNAFillModes: List[String]

Definition Classes
ConfigurationDefaults
final val allowableNumericFillStats: List[String]

Definition Classes
ConfigurationDefaults
final val allowableOutlierFilterBounds: List[String]

Definition Classes
ConfigurationDefaults
final val allowablePearsonFilterDirections: List[String]

Definition Classes
ConfigurationDefaults
final val allowablePearsonFilterModes: List[String]

Definition Classes
ConfigurationDefaults
final val allowablePearsonFilterStats: List[String]

Definition Classes
ConfigurationDefaults
final val allowableRegressionScoringMetrics: List[String]

Definition Classes
ConfigurationDefaults
final val allowableScalers: List[String]

Definition Classes
ConfigurationDefaults
final val allowableScoringOptimizationStrategies: List[String]

Definition Classes
ConfigurationDefaults
final val allowableTrainSplitMethods: List[String]

Definition Classes
ConfigurationDefaults
final val allowableVectorMutationMethods: List[String]

Definition Classes
ConfigurationDefaults
final def asInstanceOf[T0]: T0

Definition Classes
Any
def autoStoppingOff(): ConfigurationGenerator.this.type

Boolean switch for setting Auto Stopping Off
Boolean switch for setting Auto Stopping Off

Note
Default: Off
def autoStoppingOn(): ConfigurationGenerator.this.type

Boolean switch for setting Auto Stopping On
Boolean switch for setting Auto Stopping On

Note
Early stopping will invalidate the progress measurement system (due to non-determinism) Early termination will not occur immediately. Futures objects already committed will continue to run, but no new actions will be enqueued when a stopping criteria is met.
,
Default: Off
def cardinalitySwitchOff(): ConfigurationGenerator.this.type

Setter switch for turning cardinality switch off.
Setter switch for turning cardinality switch off.

Since
0.5.2
Note
Default: true
,
Not recommended for exploratory data set features.
def cardinalitySwitchOn(): ConfigurationGenerator.this.type

Setter switch for turning cardinality switch on This switch is intended to set whether the a cardinality check is performed on StringIndexed columns
Setter switch for turning cardinality switch on This switch is intended to set whether the a cardinality check is performed on StringIndexed columns

Since
0.5.2
Note
Default: true
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def covarianceFilterOff(): ConfigurationGenerator.this.type

Boolean switch for turning Covariance filtering off
Boolean switch for turning Covariance filtering off

Note
Default: Off
def covarianceFilterOn(): ConfigurationGenerator.this.type

Boolean switch for turning Covariance filtering on
Boolean switch for turning Covariance filtering on

Note
Default: Off
def dataPrepCachingOff(): ConfigurationGenerator.this.type

Boolean switch for setting the Data Prep Caching Off
Boolean switch for setting the Data Prep Caching Off

Note
Depending on the size and partitioning of the data set, caching may or may not improve performance.
,
Default: On
def dataPrepCachingOn(): ConfigurationGenerator.this.type

Boolean switch for setting the Data Prep Caching On
Boolean switch for setting the Data Prep Caching On

Note
Depending on the size and partitioning of the data set, caching may or may not improve performance.
,
Default: On
def deltaCheckBackingDirectoryRemovalOff(): ConfigurationGenerator.this.type
def deltaCheckBackingDirectoryRemovalOn(): ConfigurationGenerator.this.type
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def featureInteractionOff(): ConfigurationGenerator.this.type

Boolean switch for turning featureInteraction off
Boolean switch for turning featureInteraction off

Since
0.6.2
def featureInteractionOn(): ConfigurationGenerator.this.type

Boolean switch for setting featureInteraction on.
Boolean switch for setting featureInteraction on. This setting will, in conjunction with the settings for featureInteraction elements in the config, perform pair-wise product interactions of all elements of the feature vector, retaining either all or some of those interactions for inclusion to the feature vector. For classification tasks, InformationGain is used as the metric to compare inclusion (for modes other than 'all') For regression tasks, Variance is used as the metric.

Since
0.6.2
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def generateFeatureImportanceConfig: MainConfig
def generateMainConfig: MainConfig
def generateTreeSplitConfig: MainConfig
var genericConfig: GenericConfig

Configuration object from GenericConfigGenerator
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getDefaultConfig(modelFamily: String, predictionType: String): InstanceConfig

Definition Classes
ConfigurationDefaults
def getInstanceConfig: InstanceConfig

Getters
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def naFillOff(): ConfigurationGenerator.this.type

Boolean switch for turning off naFill actions
Boolean switch for turning off naFill actions

Note
HIGHLY RECOMMENDED TO NOT TURN OFF
,
Default: On
def naFillOn(): ConfigurationGenerator.this.type

Boolean switch for turning on naFill actions
Boolean switch for turning on naFill actions

Note
HIGHLY RECOMMENDED TO LEAVE ON.
,
Default: On
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def oneHotEncodeFlag(family: FamilyValidator): Boolean

Definition Classes
ConfigurationDefaults
def oneHotEncodeOff(): ConfigurationGenerator.this.type

Boolean switch for turning off One Hot Encoding
Boolean switch for turning off One Hot Encoding

Note
Default: Off for Tree based algorithms, On for all others.
def oneHotEncodeOn(): ConfigurationGenerator.this.type

Boolean switch for turning One Hot Encoding of string and character features on
Boolean switch for turning One Hot Encoding of string and character features on

Note
Turning One Hot Encoding on for a tree-based algorithm (XGBoost, RandomForest, Trees, GBT) is not recommended. Introducing synthetic dummy variables in a tree algorithm will force the creation of sparse tree splits.
,
Default: Off for Tree based algorithms, On for all others.
See also
See https://towardsdatascience.com/one-hot-encoding-is-making-your-tree-based-ensembles-worse-heres-why-d64b282b5769 for a full explanation.
def outlierFilterOff(): ConfigurationGenerator.this.type

Boolean switch for turning outlier filtering off
Boolean switch for turning outlier filtering off

Note
Default: Off
def outlierFilterOn(): ConfigurationGenerator.this.type

Boolean switch for turning outlier filtering on
Boolean switch for turning outlier filtering on

Note
Default: Off
def pearsonFilterOff(): ConfigurationGenerator.this.type

Boolean switch for turning Pearson filtering off
Boolean switch for turning Pearson filtering off

Note
Default: Off
def pearsonFilterOn(): ConfigurationGenerator.this.type

Boolean switch for turning Pearson filtering on
Boolean switch for turning Pearson filtering on

Note
Default: Off
def pipelineDebugFlagOff(value: Boolean): ConfigurationGenerator.this.type
def pipelineDebugFlagOn(value: Boolean): ConfigurationGenerator.this.type
def scalingFlag(family: FamilyValidator): Boolean

Definition Classes
ConfigurationDefaults
def scalingOff(): ConfigurationGenerator.this.type

Boolean switch for turning scaling Off
Boolean switch for turning scaling Off

Note
Default: Off for Tree based algorithms, On for all others.
def scalingOn(): ConfigurationGenerator.this.type

Boolean switch for turning scaling On
Boolean switch for turning scaling On

Note
For Tree based algorithms (RandomForest, XGBoost, GBT, Trees), it is not necessary (and can adversely affect the model performance) that this be turned on.
,
Default: Off for Tree based algorithms, On for all others.
def setAutoStoppingFlag(value: Boolean): ConfigurationGenerator.this.type

Boolean switch for setting the state of autoStoppingFlag
Boolean switch for setting the state of autoStoppingFlag
value
Boolean
def setConfig(value: InstanceConfig): ConfigurationGenerator.this.type

Helper method for copying a pre-defined InstanceConfig to a new instance.
Helper method for copying a pre-defined InstanceConfig to a new instance.
value
InstanceConfig object
def setCovarianceCutoffHigh(value: Double): ConfigurationGenerator.this.type

Setter
Covariance Cutoff for specifying the feature-to-feature correlation statistic upper cutoff boundary
Setter
Covariance Cutoff for specifying the feature-to-feature correlation statistic upper cutoff boundary
value
Double: Threshold Cutoff Value
Annotations
@throws( classOf[IllegalArgumentException] )
Example:
1. For feature columns A, B, and C, if A<->B is 0.02, A<->C is 0.1, B<->C is 0.85, with a value set of 0.8,
  Column C would be removed from the feature vector for having a high value of the correlation statistic.
Exceptions thrown
IllegalArgumentException if the value is <= -1.0
Note
WARNING This setting is not recommended to be used in a production use case and is only potentially useful for data exploration and experimentation.
,
Default: 0.99
def setCovarianceCutoffLow(value: Double): ConfigurationGenerator.this.type

Setter
Covariance Cutoff for specifying the feature-to-feature correlation statistic lower cutoff boundary
Setter
Covariance Cutoff for specifying the feature-to-feature correlation statistic lower cutoff boundary
value
Double: Threshold Cutoff Value
Annotations
@throws( classOf[IllegalArgumentException] )
Example:
1. For feature columns A, B, and C, if A->B is 0.02, A->C is 0.1, B->C is 0.85, with a value set of 0.05,
  Column A would be removed from the feature vector for having a low value of the correlation statistic.
Exceptions thrown
IllegalArgumentException if the value is <= -1.0
Note
WARNING the lower threshold boundary for correlation is less frequently used. Filtering of auto-correlated features is done primarily through .setCovarianceCutoffHigh values lower than the default of 0.99
,
WARNING This setting is not recommended to be used in a production use case and is only potentially useful for data exploration and experimentation.
,
Default: -0.99
def setCovarianceFilterFlag(value: Boolean): ConfigurationGenerator.this.type

Boolean switch for setting the state of covarianceFilterFlag
Boolean switch for setting the state of covarianceFilterFlag
value
Boolean
def setDataPrepCachingFlag(value: Boolean): ConfigurationGenerator.this.type

Boolean switch for setting the state of DataPrepCachingFlag
Boolean switch for setting the state of DataPrepCachingFlag
value
Boolean
def setDataPrepParallelism(value: Int): ConfigurationGenerator.this.type

Setter for defining the number of concurrent threads allocated to performing asynchronous data prep tasks within the feature engineering aspect of this application.
Setter for defining the number of concurrent threads allocated to performing asynchronous data prep tasks within the feature engineering aspect of this application.
value
Int: A value that must be greater than zero.

Annotations
@throws( classOf[IllegalArgumentException] )
Since
0.6.0
Exceptions thrown
IllegalArgumentException if a value less than or equal to zero is supplied.
Note
This value has an upper limit, depending on driver size, that will restrict the efficacy of the asynchronous tasks within the pool. Setting this too high may cause cluster instability.
def setDataReductionFactor(value: Double): ConfigurationGenerator.this.type
def setFeatureImportanceCutoffType(value: String): ConfigurationGenerator.this.type
def setFeatureImportanceCutoffValue(value: Double): ConfigurationGenerator.this.type
def setFeatureInteractionContinuousDiscretizerBucketCount(value: Int): ConfigurationGenerator.this.type

Setter for determining the behavior of continuous feature columns.
Setter for determining the behavior of continuous feature columns. In order to calculate Entropy for a continuous variable, the distribution must be converted to nominal values for estimation of per-split information gain. This setting defines how many nominal categorical values to create out of a continuously distributed feature in order to calculate Entropy.
value
Int -> must be greater than 1

Since
0.6.2
Exceptions thrown
IllegalArgumentException if the value specified is <= 1
def setFeatureInteractionFlag(value: Boolean): ConfigurationGenerator.this.type

Setter for defining the state of the featureInteractionFlag
Setter for defining the state of the featureInteractionFlag
value
Boolean on/off

Since
0.6.2
def setFeatureInteractionParallelism(value: Int): ConfigurationGenerator.this.type

Setter for configuring the concurrent count for scoring of feature interaction candidates.
Setter for configuring the concurrent count for scoring of feature interaction candidates. Due to the nature of these operations, the configuration here may need to be set differently to that of the modeling and general feature engineering phases of the toolkit. This is highly dependent on the row count of the data set being submitted.
value
Int -> must be greater than 0

Annotations
@throws( classOf[IllegalArgumentException] )
Since
0.6.2
Exceptions thrown
IllegalArgumentException if the value is < 1
def setFeatureInteractionRetentionMode(value: String): ConfigurationGenerator.this.type

Setter for determining the mode of operation for inclusion of interacted features.
Setter for determining the mode of operation for inclusion of interacted features. Modes are:
- all -> Includes all interactions between all features (after string indexing of categorical values)
- optimistic -> If the Information Gain / Variance, as compared to at least ONE of the parents of the interaction is above the threshold set by featureInteractionTargetInteractionPercentage (e.g. if IG of left parent is 0.5 and right parent is 0.9, with threshold set at 10, if the interaction between these two parents has an IG of 0.42, it would be rejected, but if it was 0.46, it would be kept)
- strict -> the threshold percentage must be met for BOTH parents. (in the above example, the IG for the interaction would have to be > 0.81 in order to be included in the feature vector).
value
String -> one of: 'all', 'optimistic', or 'strict'

Annotations
@throws( classOf[IllegalArgumentException] )
Since
0.6.2
Exceptions thrown
IllegalArgumentException if the specified value submitted is not permitted
def setFeatureInteractionTargetInteractionPercentage(value: Double): ConfigurationGenerator.this.type

Setter for establishing the minimum acceptable InformationGain or Variance allowed for an interaction candidate based on comparison to the scores of its parents.
Setter for establishing the minimum acceptable InformationGain or Variance allowed for an interaction candidate based on comparison to the scores of its parents.
value
Double in range of -inf -> inf

Since
0.6.2
def setFillConfigCardinalityCheckMode(value: String): ConfigurationGenerator.this.type

Setter for the cardinality check mode to be used.
Setter for the cardinality check mode to be used. Available modes are "warn" and "silent". In "warn" mode, an exception will be thrown if the cardinality for a categorical column is above the threshold. In "silent" mode, the field will be ignored from processing and will not be included in the feature vector.
value
String: either "warn" or "silent"

Annotations
@throws( classOf[IllegalArgumentException] )
Since
0.5.2
Exceptions thrown
IllegalArgumentException if the mode supplied is not either "warn" or "silent"
Note
Default: "silent"
def setFillConfigCardinalityLimit(value: Int): ConfigurationGenerator.this.type

Setter for overriding the default cardinality limit when validating whether a field should be considered for OneHotEncoding or StringIndexing
Setter for overriding the default cardinality limit when validating whether a field should be considered for OneHotEncoding or StringIndexing
value
Int: The value at above which a field will be declared to be of too high a cardinality for StringIndexing or OneHotEncoding

Annotations
@throws( classOf[IllegalArgumentException] )
Since
0.5.2
Exceptions thrown
java.lang.IllegalArgumentException if the number is <= to 0
Note
Default: 200
def setFillConfigCardinalityPrecision(value: Double): ConfigurationGenerator.this.type

Setter for defining the precision calculation when in "approx" mode for cardinalityType.
Setter for defining the precision calculation when in "approx" mode for cardinalityType. Must be in range 0 -> 1
value
Double: The precision for approximate distinct calculations for cardinality purposes

Annotations
@throws( classOf[IllegalArgumentException] )
Since
0.5.2
Exceptions thrown
java.lang.IllegalArgumentException if the Double supplied is outside of the range of 0 -> 1
def setFillConfigCardinalitySwitch(value: Boolean): ConfigurationGenerator.this.type

Setter for direct override of the cardinality switch
Setter for direct override of the cardinality switch

Since
0.5.2
Note
Default: true
def setFillConfigCardinalityType(value: String): ConfigurationGenerator.this.type

Setter for specifying the mode of cardinality checking [either "approx" for approximate distinct or "exact"]
Setter for specifying the mode of cardinality checking [either "approx" for approximate distinct or "exact"]
value
String: either "approx" or "exact"

Annotations
@throws( classOf[IllegalArgumentException] )
Since
0.5.2
Exceptions thrown
IllegalArgumentException if a mode other than exact or approx is specified.
Note
Default - exact
def setFillConfigCategoricalNAFillMap(value: Map[String, String]): ConfigurationGenerator.this.type

Setter for providing a map of [Column Name -> String Fill Value] for manual by-column overrides.
Setter for providing a map of [Column Name -> String Fill Value] for manual by-column overrides. Any non-specified fields in this map will utilize the "auto" statistics-based fill paradigm to calculate and fill any NA values in non-numeric columns.
value
Map[String, String]: Column Name as String -> Fill Value as String

Since
0.5.2
Note
If fields are specified in here that are not part of the DataFrame's schema, an exception will be thrown.
,
if naFillMode is specified as using Map Fill modes, this setter or the numeric na fill map MUST be set.
def setFillConfigCharacterFillStat(value: String): ConfigurationGenerator.this.type

Setter Specifies the behavior of the naFill algorithm for character (String, Char, Boolean, Byte, etc.) fields.
Setter Specifies the behavior of the naFill algorithm for character (String, Char, Boolean, Byte, etc.) fields. Generated through a df.summary() method
Available options are:
"min" (least frequently occurring value)
or
"max" (most frequently occurring value)
value
String: member of allowable list

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException if an invalid entry is made.
Note
Default: "max"
def setFillConfigCharacterNABlanketFillValue(value: String): ConfigurationGenerator.this.type

Setter for providing a 'blanket override' value (fill all found categorical columns' missing values with this specified value).
Setter for providing a 'blanket override' value (fill all found categorical columns' missing values with this specified value).
value
String: A value to fill all categorical na values in the DataFrame with.

Since
0.5.2
def setFillConfigFilterPrecision(value: Double): ConfigurationGenerator.this.type

Setter for defining the precision for calculating the model type as per the label column
Setter for defining the precision for calculating the model type as per the label column
value
Double: Precision accuracy for approximate distinct calculation.

Annotations
@throws( classOf[AssertionError] )
Since
0.5.2
Exceptions thrown
java.lang.AssertionError If the value is outside of the allowable range of {0, 1}
Note
setting this value to zero (0) for a large regression problem will incur a long processing time and an expensive shuffle.
def setFillConfigNAFillMode(value: String): ConfigurationGenerator.this.type

Mode for na fill
Available modes:
auto : Stats-based na fill for fields.
Mode for na fill
Available modes:
auto : Stats-based na fill for fields. Usage of .setNumericFillStat and .setCharacterFillStat will inform the type of statistics that will be used to fill.
mapFill : Custom by-column overrides to 'blanket fill' na values on a per-column basis. The categorical (string) fields are set via .setCategoricalNAFillMap while the numeric fields are set via .setNumericNAFillMap.
blanketFillAll : Fills all fields based on the values specified by .setCharacterNABlanketFillValue and .setNumericNABlanketFillValue. All NA's for the appropriate types will be filled in accordingly throughout all columns.
blanketFillCharOnly Will use statistics to fill in numeric fields, but will replace all categorical character fields na values with a blanket fill value.
blanketFillNumOnly Will use statistics to fill in character fields, but will replace all numeric fields na values with a blanket value.
value
String: Mode for NA Fill

Annotations
@throws( classOf[IllegalArgumentException] )
Since
0.5.2
Exceptions thrown
IllegalArgumentException if the mods specified is not supported.
def setFillConfigNumericFillStat(value: String): ConfigurationGenerator.this.type

Setter Specifies the behavior of the naFill algorithm for numeric (continuous) fields.
Values that are generated as potential fill candidates are set according to the available statistics that are calculated from a df.summary() method.
Available options are:
"min", "25p", "mean", "median", "75p", or "max"
Setter Specifies the behavior of the naFill algorithm for numeric (continuous) fields.
Values that are generated as potential fill candidates are set according to the available statistics that are calculated from a df.summary() method.
Available options are:
"min", "25p", "mean", "median", "75p", or "max"
value
String: member of allowable list.

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException if an invalid entry is made.
Note
Default: "mean"
def setFillConfigNumericNABlanketFillValue(value: Double): ConfigurationGenerator.this.type

Setter for providing a 'blanket override' value (fill all found numeric columns' missing values with this specified value)
Setter for providing a 'blanket override' value (fill all found numeric columns' missing values with this specified value)
value
Double: A value to fill all numeric na value in the DataFrame with.

Since
0.5.2
def setFillConfigNumericNAFillMap(value: Map[String, AnyVal]): ConfigurationGenerator.this.type

Setter for providing a map of [Column Name -> AnyVal Fill Value] (must be numeric).
Setter for providing a map of [Column Name -> AnyVal Fill Value] (must be numeric). Any non-specified fields in this map will utilize the "auto" statistics-based fill paradigm to calculate and fill any NA values in numeric columns.
value
Map[String, AnyVal]: Column Name as String -> Fill Numeric Type Value

Since
0.5.2
Note
If fields are specified in here that are not part of the DataFrame's schema, an exception will be thrown.
,
if naFillMode is specified as using Map Fill modes, this setter or the categorical na fill map MUST be set.
def setInferenceConfigSaveLocation(value: String): ConfigurationGenerator.this.type

Annotations
@throws( classOf[IllegalArgumentException] )
def setMlFlowAPIToken(value: String): ConfigurationGenerator.this.type
def setMlFlowBestSuffix(value: String): ConfigurationGenerator.this.type
def setMlFlowCustomRunTags(value: Map[String, AnyVal]): ConfigurationGenerator.this.type

Setter
Allows for setting a series of custom mlflow logging tags to an experiment run (universal across all iterations and models of the run) to be logged in mlflow as a custom tag key value pair
Setter
Allows for setting a series of custom mlflow logging tags to an experiment run (universal across all iterations and models of the run) to be logged in mlflow as a custom tag key value pair
value
Array of Map[String -> AnyVal]

Note
The mapped values can be of types: Double, Float, Long, Int, Short, Byte, Boolean, or String
def setMlFlowExperimentName(value: String): ConfigurationGenerator.this.type
def setMlFlowLogArtifactsFlag(value: Boolean): ConfigurationGenerator.this.type
def setMlFlowLogArtifactsOff(): ConfigurationGenerator.this.type
def setMlFlowLogArtifactsOn(): ConfigurationGenerator.this.type
def setMlFlowLoggingFlag(value: Boolean): ConfigurationGenerator.this.type
def setMlFlowLoggingMode(value: String): ConfigurationGenerator.this.type
def setMlFlowLoggingOff(): ConfigurationGenerator.this.type
def setMlFlowLoggingOn(): ConfigurationGenerator.this.type

MLFlow Logging Config
def setMlFlowModelSaveDirectory(value: String): ConfigurationGenerator.this.type

Annotations
@throws( classOf[IllegalArgumentException] )
def setMlFlowTrackingURI(value: String): ConfigurationGenerator.this.type
def setNaFillFlag(value: Boolean): ConfigurationGenerator.this.type

Boolean switch for setting the state of naFillFlag
Boolean switch for setting the state of naFillFlag
value
Boolean (whether to execute filling of na values on the DataFrame's non-ignored fields)
def setNumericBoundaries(value: Map[String, (Double, Double)]): ConfigurationGenerator.this.type
def setOneHotEncodeFlag(value: Boolean): ConfigurationGenerator.this.type

Boolean switch for setting the state of oneHotEncodeFlag
Boolean switch for setting the state of oneHotEncodeFlag
value
Boolean
def setOutlierContinuousDataThreshold(value: Int): ConfigurationGenerator.this.type

Setter
Defines the determination of whether to classify a numeric field as ordinal (categorical) or continuous.
Setter
Defines the determination of whether to classify a numeric field as ordinal (categorical) or continuous.
value
Int: Threshold for distinct counts within a numeric feature field.

Note
Continuous data fields are eligible for outlier filtering. Categorical fields are not, and if below cardinality thresholds set by this value setter, those fields will be ignored by the filtering action.
def setOutlierFieldsToIgnore(value: Array[String]): ConfigurationGenerator.this.type

Setter
Defines an Array of fields to be ignored from outlier filtering.
Setter
Defines an Array of fields to be ignored from outlier filtering.
value
Array[String]: field names to be ignored from outlier filtering.
def setOutlierFilterBounds(value: String): ConfigurationGenerator.this.type

Setter
Setter
Configures the tails of a distribution to filter out, along with the ntile settings defined in: .setOutlierLowerFilterNTile() and/or .setOutlierUpperFilterNTile()
Available Modes:
"lower" -> filters out rows from the data that are below the value set in .setOutlierLowerFilterNTile()
"upper" -> filter out rows from the data that are above the the value set in .setOutlierUpperFilterNTile()
"both" -> two-tailed filter that combines both an "upper" and "lower" filter.
value
String: Tailed direction setting for outlier filtering.

Note
This filter action is disabled by default. Before enabling, please ensure the fields to be filtered are adequately reflected in the .setOutlierFieldsToIgnore() inverse selection, as well as verifying the general distribution of the fields that have outlier data in order to select an appropriate NTile value. <u>This feature should only be supplied in rare instances and a full understanding of the impacts that this filter may have should be understood before enabling it.</u>
,
Default: "both"
def setOutlierFilterFlag(value: Boolean): ConfigurationGenerator.this.type

Boolean switch for setting the state of outlierFilterFlag
Boolean switch for setting the state of outlierFilterFlag
value
Boolean
def setOutlierFilterPrecision(value: Double): ConfigurationGenerator.this.type

Setter
Defines the precision (RSD) in which each field's cardinality is calculated through the use of approx_count_distinct SparkSQL function.
Setter
Defines the precision (RSD) in which each field's cardinality is calculated through the use of approx_count_distinct SparkSQL function. Lower values specify higher accuracy, but consume more computational resources.
value
Double: In range of 0.0, 1.0

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException if the value supplied is outside of the Range(0.0, 1.0)
Note
A Value of 0.0 will be an exact computation of distinct values. Therefore, all data must be shuffled, which is an expensive task.
See also
https://en.wikipedia.org/wiki/Coefficient_of_variation for explanation of RSD
def setOutlierLowerFilterNTile(value: Double): ConfigurationGenerator.this.type

Setter
Defines the NTILE value of the distributions of feature fields below which rows that fall beneath this value will be filtered from the data.
Setter
Defines the NTILE value of the distributions of feature fields below which rows that fall beneath this value will be filtered from the data.
value
Double: Lower Threshold boundary NTILE for Outlier Filtering

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException if the value supplied is outside of the Range(0.0,1.0)
Note
Only used if Outlier filtering is set to 'On' and Filter Direction is either 'both' or 'lower'
def setOutlierUpperFilterNTile(value: Double): ConfigurationGenerator.this.type

Setter
Defines the NTILE value of the distributions of feature fields above which rows that fall above this value will be filtered from the data
Setter
Defines the NTILE value of the distributions of feature fields above which rows that fall above this value will be filtered from the data
value
Double: Upper Threshold boundary NTILE value for Outlier Filtering

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException if the value supplied is outside of the Range(0.0,1.0)
Note
Only used if Outlier filtering is set to 'On' and Filter Direction is either 'both' or 'upper'
def setPearsonAutoFilterNTile(value: Double): ConfigurationGenerator.this.type

Setter
Provides the ntile threshold above or below which (depending on PearsonFilterDirection setting) fields will
be removed, depending on the distribution of pearson statistics from all feature columns.
Setter
Provides the ntile threshold above or below which (depending on PearsonFilterDirection setting) fields will
be removed, depending on the distribution of pearson statistics from all feature columns.
value
Double: In range of (0.0, 1.0)

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException if the value provided is outside of the range of (0.0, 1.0)
Note
Default: 0.75 (Q3)
,
WARNING - this feature is ONLY recommended to be used for exploratory development work.
def setPearsonFilterDirection(value: String): ConfigurationGenerator.this.type

Setter
Controls which direction of correlation values to filter out.
Setter
Controls which direction of correlation values to filter out. Allowable modes:
"greater" or "lesser"
value
String: one of available modes

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException if the value provided is not in available modes list.
Note
Default: greater
def setPearsonFilterFlag(value: Boolean): ConfigurationGenerator.this.type

Boolean switch for setting the state of pearsonFilterFlag
Boolean switch for setting the state of pearsonFilterFlag
value
Boolean
def setPearsonFilterManualValue(value: Double): ConfigurationGenerator.this.type

Setter
Controls the Pearson manual filter value, if the PearsonFilterMode is set to "manual"
Setter
Controls the Pearson manual filter value, if the PearsonFilterMode is set to "manual"

value
Double: A value that is used as a cut-off point to filter fields whose correlation statistic is either above or below will be culled from the feature vector.
Example:
1. with .setPearsonFilterMode("manual") and .setPearsonFilterDirection("greater")
  the removal of fields that have a pearson correlation coefficient result above this
  value will be dropped from modeling runs.
def setPearsonFilterMode(value: String): ConfigurationGenerator.this.type

Setter
Controls whether to use "auto" mode (using the PearsonAutoFilterNTile) or "manual" mode (using the
PearsonFilterManualValue) to cull fields from the feature vector.
Setter
Controls whether to use "auto" mode (using the PearsonAutoFilterNTile) or "manual" mode (using the
PearsonFilterManualValue) to cull fields from the feature vector.
value
String: either "auto" or "manual"

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException if the value provided is not in available modes list (auto and manual)
Note
Default: "auto"
def setPearsonFilterStatistic(value: String): ConfigurationGenerator.this.type

Setter
Selection for filter statistic to be used in Pearson Filtering.
Available modes: "pvalue", "degreesFreedom", or "pearsonStat"
Setter
Selection for filter statistic to be used in Pearson Filtering.
Available modes: "pvalue", "degreesFreedom", or "pearsonStat"
value
String: one of available modes.

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException if the value provided is not in available modes list.
Note
Default: pearsonStat
def setPipelineDebugFlag(value: Boolean): ConfigurationGenerator.this.type
def setScalingFlag(value: Boolean): ConfigurationGenerator.this.type

Boolean switch for setting the state of the scalingFlag
Boolean switch for setting the state of the scalingFlag
value
Boolean
def setScalingMax(value: Double): ConfigurationGenerator.this.type
def setScalingMin(value: Double): ConfigurationGenerator.this.type
def setScalingPNorm(value: Double): ConfigurationGenerator.this.type
def setScalingStandardMeanFlag(value: Boolean): ConfigurationGenerator.this.type
def setScalingStandardMeanFlagOff(): ConfigurationGenerator.this.type
def setScalingStandardMeanFlagOn(): ConfigurationGenerator.this.type
def setScalingStdDevFlag(value: Boolean): ConfigurationGenerator.this.type
def setScalingStdDevFlagOff(): ConfigurationGenerator.this.type
def setScalingStdDevFlagOn(): ConfigurationGenerator.this.type
def setScalingType(value: String): ConfigurationGenerator.this.type
def setSplitCachingStrategy(value: String): ConfigurationGenerator.this.type

Setter for determining the split caching strategy (either persist to disk for each kfold split or backing to Delta)
Setter for determining the split caching strategy (either persist to disk for each kfold split or backing to Delta)
value
Configuration string either 'persist' or 'delta'

Since
0.7.1
def setStringBoundaries(value: Map[String, List[String]]): ConfigurationGenerator.this.type

Algorithm Config
def setTunerAutoStoppingScore(value: Double): ConfigurationGenerator.this.type

Tuner Config
def setTunerContinuousEvolutionGeneticMixing(value: Double): ConfigurationGenerator.this.type
def setTunerContinuousEvolutionImprovementThreshold(value: Int): ConfigurationGenerator.this.type

Setter for defining the secondary stopping criteria for continuous training mode ( number of consistently not-improving runs to terminate the learning algorithm due to diminishing returns.
Setter for defining the secondary stopping criteria for continuous training mode ( number of consistently not-improving runs to terminate the learning algorithm due to diminishing returns.
value
Negative Integer (an improvement to a priori will reset the counter and subsequent non-improvements will decrement a mutable counter. If the counter hits this limit specified in value, the continuous mode algorithm will stop).

Annotations
@throws( classOf[IllegalArgumentException] )
Since
0.6.0
Exceptions thrown
IllegalArgumentException if the value is positive.
def setTunerContinuousEvolutionMaxIterations(value: Int): ConfigurationGenerator.this.type
def setTunerContinuousEvolutionMutationAggressiveness(value: Int): ConfigurationGenerator.this.type
def setTunerContinuousEvolutionParallelism(value: Int): ConfigurationGenerator.this.type
def setTunerContinuousEvolutionRollingImprovementCount(value: Int): ConfigurationGenerator.this.type
def setTunerContinuousEvolutionStoppingScore(value: Double): ConfigurationGenerator.this.type
def setTunerDeltaCacheBackingDirectory(value: String): ConfigurationGenerator.this.type

Setter for providing a path to write the kfold train/test splits as Delta data sets to (useful for extremely large data sets or a situation where using local disk storage might be prohibitively expensive)
Setter for providing a path to write the kfold train/test splits as Delta data sets to (useful for extremely large data sets or a situation where using local disk storage might be prohibitively expensive)
value
String path to a dbfs location for creating the temporary (or persisted)

Since
0.7.1
def setTunerDeltaCacheBackingDirectoryRemovalFlag(value: Boolean): ConfigurationGenerator.this.type

Setter for whether or not to delete the written train/test splits for the run in Delta.
Setter for whether or not to delete the written train/test splits for the run in Delta. Defaulted to true which means that the job will delete the data on Object store to clean itself up after the run is completed if the splitCachingStrategy is set to 'delta'
value
Boolean - true => delete false => leave on Object Store

Since
0.7.1
def setTunerEvolutionStrategy(value: String): ConfigurationGenerator.this.type
def setTunerFirstGenerationGenePool(value: Int): ConfigurationGenerator.this.type
def setTunerFixedMutationValue(value: Int): ConfigurationGenerator.this.type
def setTunerGenerationalMutationStrategy(value: String): ConfigurationGenerator.this.type
def setTunerGeneticMBOCandidateFactor(value: Int): ConfigurationGenerator.this.type

Setter for defining the factor to be applied to the candidate listing of hyperparameters to generate through mutation for each generation other than the initial and post-modeling optimization phases.
Setter for defining the factor to be applied to the candidate listing of hyperparameters to generate through mutation for each generation other than the initial and post-modeling optimization phases. The larger this value (default: 10), the more potential space can be searched. There is not a large performance hit to this, and as such, values in excess of 100 are viable.
value
Int - a factor to multiply the numberOfMutationsPerGeneration by to generate a count of potential candidates.

Annotations
@throws( classOf[IllegalArgumentException] )
Since
0.6.0
Exceptions thrown
IllegalArgumentException if the value is not greater than zero.
def setTunerGeneticMBORegressorType(value: String): ConfigurationGenerator.this.type

Setter for selecting the type of Regressor to use for the within-epoch generation MBO of candidates
Setter for selecting the type of Regressor to use for the within-epoch generation MBO of candidates
value
String - one of "XGBoost", "LinearRegression" or "RandomForest"

Annotations
@throws( classOf[IllegalArgumentException] )
Since
0.6.0
Exceptions thrown
IllegalArgumentException if the value is not supported
def setTunerGeneticMixing(value: Double): ConfigurationGenerator.this.type
def setTunerHyperSpaceInferenceCount(value: Int): ConfigurationGenerator.this.type
def setTunerHyperSpaceInferenceFlag(value: Boolean): ConfigurationGenerator.this.type
def setTunerHyperSpaceInferenceOff(): ConfigurationGenerator.this.type
def setTunerHyperSpaceInferenceOn(): ConfigurationGenerator.this.type
def setTunerHyperSpaceModelCount(value: Int): ConfigurationGenerator.this.type
def setTunerHyperSpaceModelType(value: String): ConfigurationGenerator.this.type
def setTunerInitialGenerationArraySeed(value: Long): ConfigurationGenerator.this.type
def setTunerInitialGenerationIndexMixingMode(value: String): ConfigurationGenerator.this.type
def setTunerInitialGenerationMode(value: String): ConfigurationGenerator.this.type
def setTunerInitialGenerationPermutationCount(value: Int): ConfigurationGenerator.this.type
def setTunerKFold(value: Int): ConfigurationGenerator.this.type
def setTunerKSampleCardinalityThreshold(value: Int): ConfigurationGenerator.this.type

Setter - for overriding the cardinality threshold exception threshold.
Setter - for overriding the cardinality threshold exception threshold. [WARNING] increasing this value on a sufficiently large data set could incur, during runtime, excessive memory and cpu pressure on the cluster.
value
Int: the limit above which an exception will be thrown for a classification problem wherein the label distinct count is too large to successfully generate synthetic data.

Since
0.5.1
Note
Default: 20
def setTunerKSampleKGroups(value: Int): ConfigurationGenerator.this.type

Setter for specifying the number of K-Groups to generate in the KMeans model
Setter for specifying the number of K-Groups to generate in the KMeans model
value
Int: number of k groups to generate
returns
this
def setTunerKSampleKMeansDistanceMeasurement(value: String): ConfigurationGenerator.this.type

Setter for which distance measurement to use to calculate the nearness of vectors to a centroid
Setter for which distance measurement to use to calculate the nearness of vectors to a centroid
value
String: Options -> "euclidean" or "cosine" Default: "euclidean"
returns
this

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException() if an invalid value is entered
def setTunerKSampleKMeansMaxIter(value: Int): ConfigurationGenerator.this.type

Setter for specifying the maximum number of iterations for the KMeans model to go through to converge
Setter for specifying the maximum number of iterations for the KMeans model to go through to converge
value
Int: Maximum limit on iterations
returns
this
def setTunerKSampleKMeansPredictionCol(value: String): ConfigurationGenerator.this.type

Setter for the internal KMeans column for cluster membership attribution
Setter for the internal KMeans column for cluster membership attribution
value
String: column name for internal algorithm column for group membership
returns
this
def setTunerKSampleKMeansSeed(value: Long): ConfigurationGenerator.this.type

Setter for a KMeans seed for the clustering algorithm
Setter for a KMeans seed for the clustering algorithm
value
Long: Seed value
returns
this
def setTunerKSampleKMeansTolerance(value: Double): ConfigurationGenerator.this.type

Setter for Setting the tolerance for KMeans (must be >0)
Setter for Setting the tolerance for KMeans (must be >0)
value
The tolerance value setting for KMeans
returns
this

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException() if a value less than 0 is entered
See also
reference: http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.ml.clustering.KMeans for further details.
def setTunerKSampleLSHHashTables(value: Int): ConfigurationGenerator.this.type

Setter for Configuring the number of Hash Tables to use for MinHashLSH
Setter for Configuring the number of Hash Tables to use for MinHashLSH
value
Int: Count of hash tables to use
returns
this

See also
http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.ml.feature.MinHashLSH for more information
def setTunerKSampleLSHOutputCol(value: String): ConfigurationGenerator.this.type

Setter for the internal LSH output hash information column
Setter for the internal LSH output hash information column
value
String: column name for the internal MinHashLSH Model transformation value
returns
this
def setTunerKSampleLSHSeed(value: Long): ConfigurationGenerator.this.type
def setTunerKSampleLabelBalanceMode(value: String): ConfigurationGenerator.this.type

Setter - for determining the label balance approach mode.
Setter - for determining the label balance approach mode.
value
String: one of: 'match', 'percentage' or 'target'

Annotations
@throws( classOf[IllegalArgumentException] )
Since
0.5.1
Exceptions thrown
IllegalArgumentException if the provided mode is not supported.
Note
Default: "percentage"
,
Available modes:
'match': Will match all smaller class counts to largest class count. [WARNING] - May significantly increase memory pressure!
'percentage' Will adjust smaller classes to a percentage value of the largest class count. 'target' Will increase smaller class counts to a fixed numeric target of rows.
def setTunerKSampleMinimumVectorCountToMutate(value: Int): ConfigurationGenerator.this.type

Setter for minimum threshold for vector indexes to mutate within the feature vector.
Setter for minimum threshold for vector indexes to mutate within the feature vector.
value
The minimum (or fixed) number of indexes to mutate.
returns
this

Note
In vectorMutationMethod "fixed" this sets the fixed count of how many vector positions to mutate. In vectorMutationMethod "random" this sets the lower threshold for 'at least this many indexes will be mutated'
def setTunerKSampleMutationMode(value: String): ConfigurationGenerator.this.type

Setter for the Mutation Mode of the feature vector individual values
Setter for the Mutation Mode of the feature vector individual values
value
String: the mode to use.
returns
this

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException() if the mode is not supported.
Note
Options: "weighted" - uses weighted averaging to scale the euclidean distance between the centroid vector and mutation candidate vectors "random" - randomly selects a position on the euclidean vector between the centroid vector and the candidate mutation vectors "ratio" - uses a ratio between the values of the centroid vector and the mutation vector *
def setTunerKSampleMutationValue(value: Double): ConfigurationGenerator.this.type

Setter for specifying the mutation magnitude for the modes 'weighted' and 'ratio' in mutationMode
Setter for specifying the mutation magnitude for the modes 'weighted' and 'ratio' in mutationMode
value
Double: value between 0 and 1 for mutation magnitude adjustment.
returns
this

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException() if the value specified is outside of the range (0, 1)
Note
the higher this value, the closer to the centroid vector vs. the candidate mutation vector the synthetic row data will be.
def setTunerKSampleNumericRatio(value: Double): ConfigurationGenerator.this.type

Setter - for specifying the percentage ratio for the mode 'percentage' in setLabelBalanceMode()
Setter - for specifying the percentage ratio for the mode 'percentage' in setLabelBalanceMode()
value
Double: A fractional double in the range of 0.0 to 1.0.

Annotations
@throws( ... )
Since
0.5.1
Exceptions thrown
UnsupportedOperationException() if the provided value is outside of the range of 0.0 -> 1.0
Note
Default: 0.2
,
Setting this value to 1.0 is equivalent to setting the label balance mode to 'match'
def setTunerKSampleNumericTarget(value: Int): ConfigurationGenerator.this.type

Setter - for specifying the target row count to generate for 'target' mode in setLabelBalanceMode()
Setter - for specifying the target row count to generate for 'target' mode in setLabelBalanceMode()
value
Int: The desired final number of rows per minority class label

Since
0.5.1
Note
[WARNING] Setting this value to too high of a number will greatly increase runtime and memory pressure.
def setTunerKSampleQuorumCount(value: Int): ConfigurationGenerator.this.type

Setter for how many vectors to find in adjacency to the centroid for generation of synthetic data
Setter for how many vectors to find in adjacency to the centroid for generation of synthetic data
value
Int: Number of vectors to find nearest each centroid within the class
returns
this

Note
the higher the value set here, the higher the variance in synthetic data generation
def setTunerKSampleSyntheticCol(value: String): ConfigurationGenerator.this.type

Setter - for setting the name of the Synthetic column name
Setter - for setting the name of the Synthetic column name
value
String: A column name that is uniquely not part of the main DataFrame

Since
0.5.1
def setTunerKSampleVectorMutationMethod(value: String): ConfigurationGenerator.this.type

Setter for the Vector Mutation Method
Setter for the Vector Mutation Method
value
String - the mode to use.
returns
this

Annotations
@throws( classOf[IllegalArgumentException] )
Exceptions thrown
IllegalArgumentException() if the mode is not supported.
Note
Options: "fixed" - will use the value of minimumVectorCountToMutate to select random indexes of this number of indexes. "random" - will use this number as a lower bound on a random selection of indexes between this and the vector length. "all" - will mutate all of the vectors.
def setTunerModelSeed(value: Map[String, Any]): ConfigurationGenerator.this.type
def setTunerMutationMagnitudeMode(value: String): ConfigurationGenerator.this.type
def setTunerNumberOfGenerations(value: Int): ConfigurationGenerator.this.type
def setTunerNumberOfMutationsPerGeneration(value: Int): ConfigurationGenerator.this.type
def setTunerNumberOfParentsToRetain(value: Int): ConfigurationGenerator.this.type
def setTunerOutputDfRepartitionScaleFactor(value: Int): ConfigurationGenerator.this.type
def setTunerParallelism(value: Int): ConfigurationGenerator.this.type
def setTunerSeed(value: Long): ConfigurationGenerator.this.type
def setTunerTrainPortion(value: Double): ConfigurationGenerator.this.type
def setTunerTrainSplitChronologicalColumn(value: String): ConfigurationGenerator.this.type
def setTunerTrainSplitChronologicalRandomPercentage(value: Double): ConfigurationGenerator.this.type
def setTunerTrainSplitMethod(value: String): ConfigurationGenerator.this.type
def setVarianceFilterFlag(value: Boolean): ConfigurationGenerator.this.type

Boolean switch for setting the state of varianceFilterFlag
Boolean switch for setting the state of varianceFilterFlag
value
Boolean (whether or not to filter out fields from the feature vector that all have the same value)
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
def varianceFilterOff(): ConfigurationGenerator.this.type

Boolean switch for turning variance filtering off
Boolean switch for turning variance filtering off

Note
Default: On
def varianceFilterOn(): ConfigurationGenerator.this.type

Boolean switch for turning variance filtering on
Boolean switch for turning variance filtering on

Note
Default: On
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Deprecated Value Members

def setFillConfigModelSelectionDistinctThreshold(value: Int): ConfigurationGenerator.this.type

Setter
The threshold value that is used to detect, based on the supplied labelCol, the cardinality of the label through a .distinct().count() being issued to the label column.
Setter
The threshold value that is used to detect, based on the supplied labelCol, the cardinality of the label through a .distinct().count() being issued to the label column. Values from this cardinality determination that are above this setter's value will be considered to be a Regression Task, those below will be considered a Classification Task.
value
Int: Threshold value for the labelCol cardinality check. Values above this setting will be determined to be a regression task; below to be a classification task.

Annotations
@deprecated
Deprecated
Note
Default: 50
,
In the case of exceptions being thrown for incorrect type (detected a classifier, but intended usage is for a regression, lower this value. Conversely, if a classification problem has a significant number of classes, above the default threshold of this setting (50), increase this value.)

Related Docs: object ConfigurationGenerator | package config

class ConfigurationGenerator extends ConfigurationDefaults

Instance Constructors

new ConfigurationGenerator(modelFamily: String, predictionType: String, genericConfig: GenericConfig)

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final val allowableCardinalilties: List[String]

final val allowableCategoricalFilterModes: List[String]

final val allowableCharacterFillStats: List[String]

final val allowableClassificationScoringMetrics: List[String]

final val allowableDateTimeConversionTypes: List[String]

final val allowableDateTimeConversions: List[String]

final val allowableEvolutionStrategies: List[String]

final val allowableFeatureImportanceCutoffTypes: List[String]

final val allowableFeatureInteractionModes: List[String]

final val allowableGeneticMBORegressorTypes: List[String]

final val allowableHyperSpaceModelTypes: List[String]

final val allowableInitialGenerationIndexMixingModes: List[String]

final val allowableInitialGenerationModes: List[String]

final val allowableKMeansDistanceMeasurements: List[String]

final val allowableLabelBalanceModes: List[String]

final val allowableMlFlowLoggingModes: List[String]

final val allowableMutationMagnitudeMode: List[String]

final val allowableMutationModes: List[String]

final val allowableMutationStrategies: List[String]

final val allowableNAFillModes: List[String]

final val allowableNumericFillStats: List[String]

final val allowableOutlierFilterBounds: List[String]

final val allowablePearsonFilterDirections: List[String]

final val allowablePearsonFilterModes: List[String]

final val allowablePearsonFilterStats: List[String]

final val allowableRegressionScoringMetrics: List[String]

final val allowableScalers: List[String]

final val allowableScoringOptimizationStrategies: List[String]

final val allowableTrainSplitMethods: List[String]

final val allowableVectorMutationMethods: List[String]

final def asInstanceOf[T0]: T0

def autoStoppingOff(): ConfigurationGenerator.this.type

def autoStoppingOn(): ConfigurationGenerator.this.type

def cardinalitySwitchOff(): ConfigurationGenerator.this.type

def cardinalitySwitchOn(): ConfigurationGenerator.this.type

def clone(): AnyRef

def covarianceFilterOff(): ConfigurationGenerator.this.type

def covarianceFilterOn(): ConfigurationGenerator.this.type

def dataPrepCachingOff(): ConfigurationGenerator.this.type

def dataPrepCachingOn(): ConfigurationGenerator.this.type

def deltaCheckBackingDirectoryRemovalOff(): ConfigurationGenerator.this.type

def deltaCheckBackingDirectoryRemovalOn(): ConfigurationGenerator.this.type

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def featureInteractionOff(): ConfigurationGenerator.this.type

def featureInteractionOn(): ConfigurationGenerator.this.type

def finalize(): Unit

def generateFeatureImportanceConfig: MainConfig

def generateMainConfig: MainConfig

def generateTreeSplitConfig: MainConfig

var genericConfig: GenericConfig

final def getClass(): Class[_]

def getDefaultConfig(modelFamily: String, predictionType: String): InstanceConfig

def getInstanceConfig: InstanceConfig

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

def naFillOff(): ConfigurationGenerator.this.type

def naFillOn(): ConfigurationGenerator.this.type

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def oneHotEncodeFlag(family: FamilyValidator): Boolean

def oneHotEncodeOff(): ConfigurationGenerator.this.type

def oneHotEncodeOn(): ConfigurationGenerator.this.type

def outlierFilterOff(): ConfigurationGenerator.this.type

def outlierFilterOn(): ConfigurationGenerator.this.type

def pearsonFilterOff(): ConfigurationGenerator.this.type

def pearsonFilterOn(): ConfigurationGenerator.this.type

def pipelineDebugFlagOff(value: Boolean): ConfigurationGenerator.this.type

def pipelineDebugFlagOn(value: Boolean): ConfigurationGenerator.this.type

def scalingFlag(family: FamilyValidator): Boolean