com
.
databricks
.
labs
.
automl
.
sanitize
OutlierFiltering
Related Doc:
package sanitize
class
OutlierFiltering
extends
SparkSessionWrapper
with
DataValidation
Linear Supertypes
DataValidation
,
SparkSessionWrapper
,
Serializable
,
Serializable
,
AnyRef
,
Any
Ordering
Alphabetic
By Inheritance
Inherited
OutlierFiltering
DataValidation
SparkSessionWrapper
Serializable
Serializable
AnyRef
Any
Hide All
Show All
Visibility
Public
All
Instance Constructors
new
OutlierFiltering
(
df:
DataFrame
)
df
- Input DataFrame pre-feature vectorization
Value Members
final
def
!=
(
arg0:
Any
)
:
Boolean
Definition Classes
AnyRef → Any
final
def
##
()
:
Int
Definition Classes
AnyRef → Any
final
def
==
(
arg0:
Any
)
:
Boolean
Definition Classes
AnyRef → Any
def
_allowableCardinalilties
:
List
[
String
]
Definition Classes
DataValidation
def
_allowableCategoricalFilterModes
:
List
[
String
]
Definition Classes
DataValidation
def
_allowableDateTimeConversions
:
List
[
String
]
Definition Classes
DataValidation
final
def
asInstanceOf
[
T0
]
:
T0
Definition Classes
Any
def
clone
()
:
AnyRef
Attributes
protected[
java.lang
]
Definition Classes
AnyRef
Annotations
@throws
(
...
)
def
convertDateAndTime
(
df:
DataFrame
,
dateFields:
List
[
String
]
,
timeFields:
List
[
String
]
,
mode:
String
)
: (
DataFrame
,
List
[
String
])
Definition Classes
DataValidation
final
def
eq
(
arg0:
AnyRef
)
:
Boolean
Definition Classes
AnyRef
def
equals
(
arg0:
Any
)
:
Boolean
Definition Classes
AnyRef → Any
def
filterContinuousOutliers
(
manualFilter:
List
[
ManualFilters
]
,
vectorIgnoreList:
Array
[
String
]
)
: (
DataFrame
,
DataFrame
,
Map
[
String
, (
Double
,
String
)])
def
filterContinuousOutliers
(
vectorIgnoreList:
Array
[
String
]
,
ignoreList:
Array
[
String
] =
Array.empty[String]
)
: (
DataFrame
,
DataFrame
,
Map
[
String
, (
Double
,
String
)])
def
finalize
()
:
Unit
Attributes
protected[
java.lang
]
Definition Classes
AnyRef
Annotations
@throws
(
classOf[java.lang.Throwable]
)
def
generateAssembly
(
numericColumns:
List
[
String
]
,
characterColumns:
List
[
String
]
,
featureCol:
String
)
: (
Array
[
StringIndexer
],
Array
[
String
],
VectorAssembler
)
Definition Classes
DataValidation
final
def
getClass
()
:
Class
[_]
Definition Classes
AnyRef → Any
def
getContinuousDataThreshold
:
Int
def
getFilterBounds
:
String
def
getFilterPrecision
:
Double
def
getLabelCol
:
String
def
getLowerFilterNTile
:
Double
def
getParallelism
:
Int
def
getUpperFilterNTile
:
Double
def
hashCode
()
:
Int
Definition Classes
AnyRef → Any
def
indexStrings
(
categoricalFields:
List
[
String
]
)
: (
Array
[
StringIndexer
],
Array
[
String
])
Definition Classes
DataValidation
def
invalidateSelection
(
value:
String
,
allowances:
Seq
[
String
]
)
:
String
Definition Classes
DataValidation
final
def
isInstanceOf
[
T0
]
:
Boolean
Definition Classes
Any
final
def
ne
(
arg0:
AnyRef
)
:
Boolean
Definition Classes
AnyRef
final
def
notify
()
:
Unit
Definition Classes
AnyRef
final
def
notifyAll
()
:
Unit
Definition Classes
AnyRef
def
oneHotEncodeStrings
(
stringIndexedFields:
List
[
String
]
)
: (
OneHotEncoderEstimator
,
Array
[
String
])
Definition Classes
DataValidation
lazy val
sc
:
SparkContext
Definition Classes
SparkSessionWrapper
def
setContinuousDataThreshold
(
value:
Int
)
:
OutlierFiltering
.this.type
def
setFilterBounds
(
value:
String
)
:
OutlierFiltering
.this.type
def
setFilterPrecision
(
value:
Double
)
:
OutlierFiltering
.this.type
def
setLabelCol
(
value:
String
)
:
OutlierFiltering
.this.type
def
setLowerFilterNTile
(
value:
Double
)
:
OutlierFiltering
.this.type
def
setParallelism
(
value:
Int
)
:
OutlierFiltering
.this.type
def
setUpperFilterNTile
(
value:
Double
)
:
OutlierFiltering
.this.type
lazy val
spark
:
SparkSession
Definition Classes
SparkSessionWrapper
final
def
synchronized
[
T0
]
(
arg0: ⇒
T0
)
:
T0
Definition Classes
AnyRef
def
toString
()
:
String
Definition Classes
AnyRef → Any
def
validateCardinality
(
df:
DataFrame
,
stringFields:
List
[
String
]
,
cardinalityLimit:
Int
=
500
,
parallelism:
Int
=
20
)
:
ValidatedCategoricalFields
Definition Classes
DataValidation
def
validateFieldPresence
(
df:
DataFrame
,
column:
String
)
:
Unit
Definition Classes
DataValidation
def
validateInputDataframe
(
df:
DataFrame
)
:
Unit
Definition Classes
DataValidation
def
validateLabelAndFeatures
(
df:
DataFrame
,
labelCol:
String
,
featureCol:
String
)
:
Unit
Definition Classes
DataValidation
final
def
wait
()
:
Unit
Definition Classes
AnyRef
Annotations
@throws
(
...
)
final
def
wait
(
arg0:
Long
,
arg1:
Int
)
:
Unit
Definition Classes
AnyRef
Annotations
@throws
(
...
)
final
def
wait
(
arg0:
Long
)
:
Unit
Definition Classes
AnyRef
Annotations
@throws
(
...
)
Inherited from
DataValidation
Inherited from
SparkSessionWrapper
Inherited from
Serializable
Inherited from
Serializable
Inherited from
AnyRef
Inherited from
Any
Ungrouped