Preprocess

Type Members

type PreprocessedData = Array[T]

Definition Classes
Preprocessing
abstract type T

Definition Classes
Preprocessing

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getColumnNames(path: String, header: Int = 1, separator: String = ","): Array[String]

Get the columns names of a data set.
Get the columns names of a data set. Assumes the names are placed in the first line and separated by a comma.
path
Path of the file in the system.
header
Number of lines to discard (header), by default 1.
separator
Number of lines to discard (header), by default 1.
returns
An array of strings, where each string is a column name. Names are in the original order.

Note
This is quick and dirty, open normally by keeping the class and only keep the last column
def getColumnNamesMap(path: String, header: Int = 1, separator: String = ","): Map[Int, String]

Get the columns names of a data set in a map, assigning the position index (integer) to the corresponding name (string)
Get the columns names of a data set in a map, assigning the position index (integer) to the corresponding name (string)
path
Path of the file in the system.
header
Number of lines to discard (header), by default 1.
separator
Number of lines to discard (header), by default 1.
returns
An array of strings, where each string is a column name. Names are in the original order.

Note
This is quick and dirty, open normally by keeping the class and only keep the last column
def getLabels(path: String, header: Int = 1, separator: String = ",", excludeIndex: Boolean = false): Array[Boolean]

Get the last column of a data file, assume it is the class and that it is numerical, even binary
Get the last column of a data file, assume it is the class and that it is numerical, even binary
path
Path of the file in the system.
header
Number of lines to discard (header), by default 1.
separator
Number of lines to discard (header), by default 1.
excludeIndex
Whether to exclude an index (the first column) or not.
returns
The "class" column, should be an Array of Double

Note
This is quick and dirty, open normally by keeping the class and only keep the last column
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def ksRank(input: Array[Array[Double]], parallelize: Int = 0): Array[Array[(Int, Float)]]

Return the rank index structure (as in HiCS).
Return the rank index structure (as in HiCS).
Note that the numbers might be different in the case of ties, in comparison with other implementations.
input
A 2-D Array of Double (data set).
returns
A 2-D Array of 2-D Tuple, where the first element is the original index, the second is its value (actually not in used for the KSP test)
def ksRankSimple(input: Array[Array[Double]], parallelize: Int = 0): Array[Array[Int]]

Return the rank index structure (as in HiCS).
Return the rank index structure (as in HiCS).
Note that the numbers might be different in the case of ties, in comparison with other implementations.
input
A 2-D Array of Double (data set, column-oriented).
returns
A 2-D Array of Int, where the element is the original index in the unsorted data set
def mwRank(input: Array[Array[Double]], parallelize: Int): Array[Array[(Int, Float)]]

Return the rank index structure for MWP, with adjusted ranks but no correction for ties.
Return the rank index structure for MWP, with adjusted ranks but no correction for ties.
input
A 2-D Array of Double (data set, column-oriented).
returns
A 2-D Array of 2-D Tuple, where the first element is the original index, the second is its rank.
def mwRankCorrectionCumulative(input: Array[Array[Double]], parallelize: Int): Array[Array[(Int, Float, Double)]]

Return the rank index structure for MWP, with adjusted ranks AND correction for ties.
Return the rank index structure for MWP, with adjusted ranks AND correction for ties.
input
A 2-D Array of Double (data set, column-oriented).
returns
A 2-D Array of 3-D Tuple, where the first element is the original index, the second is its rank and the the last one a cumulative correction for ties.
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def open(path: String, header: Int = 1, separator: String = ",", excludeIndex: Boolean = false, dropClass: Boolean = true, sample1000: Boolean = false): Array[Array[Double]]

Helper function that redirects to openArff in case an arff is given else openCSV
Helper function that redirects to openArff in case an arff is given else openCSV
returns
A data set (row oriented)
def openArff(path: String, dropClass: Boolean = true, max1000: Boolean = false): Array[Array[Double]]

Open an Arff file as a 2-D Array of Double
Open an Arff file as a 2-D Array of Double
path
Path to the file in the current filesystem
dropClass
Whether to drop the "class" column if there is one
max1000
cap the opened data to 1000 rows. If the original data has more rows, sample 1000 without replacement
returns
A 2-D Array of Double containing the values for each numerical columns (row-oriented)

Note
This method is inspired from the work of Fabian Keller
def openCSV(path: String, header: Int = 1, separator: String = ",", excludeIndex: Boolean = false, dropClass: Boolean = true, max1000: Boolean = false): Array[Array[Double]]

Open a csv file at a specified path.
Open a csv file at a specified path. Currently, only handle numerical values.
path
Path of the file in the system.
header
Number of lines to discard (header), by default 1.
separator
Separator used, by default, comma.
excludeIndex
Whether to exclude an index (the first column) or not.
dropClass
Whether to drop the "class" column if there is one. (assumes it is the last one)
max1000
cap the opened data to 1000 rows. If the original data has more rows, sample 1000 without replacement
returns
A 2-D Array of Double containing the values from the csv. (row-oriented)
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Doc: package preprocess

object Preprocess extends Preprocessing

Type Members

type PreprocessedData = Array[T]

abstract type T

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def getColumnNames(path: String, header: Int = 1, separator: String = ","): Array[String]

def getColumnNamesMap(path: String, header: Int = 1, separator: String = ","): Map[Int, String]

def getLabels(path: String, header: Int = 1, separator: String = ",", excludeIndex: Boolean = false): Array[Boolean]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

def ksRank(input: Array[Array[Double]], parallelize: Int = 0): Array[Array[(Int, Float)]]

def ksRankSimple(input: Array[Array[Double]], parallelize: Int = 0): Array[Array[Int]]

def mwRank(input: Array[Array[Double]], parallelize: Int): Array[Array[(Int, Float)]]

def mwRankCorrectionCumulative(input: Array[Array[Double]], parallelize: Int): Array[Array[(Int, Float, Double)]]

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def open(path: String, header: Int = 1, separator: String = ",", excludeIndex: Boolean = false, dropClass: Boolean = true, sample1000: Boolean = false): Array[Array[Double]]

def openArff(path: String, dropClass: Boolean = true, max1000: Boolean = false): Array[Array[Double]]

def openCSV(path: String, header: Int = 1, separator: String = ",", excludeIndex: Boolean = false, dropClass: Boolean = true, max1000: Boolean = false): Array[Array[Double]]

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Preprocessing

Inherited from AnyRef

Inherited from Any

Ungrouped