public class DataSet extends Pair<org.jblas.DoubleMatrix,org.jblas.DoubleMatrix> implements Persistable, Iterable<DataSet>
Pair.DefaultLexicographicPairComparator<F extends Comparable<F>,S extends Comparable<S>>, Pair.FirstComparator<S extends Comparable<? super S>,T>, Pair.LexicographicPairComparator<F,S>, Pair.ReverseFirstComparator<S extends Comparable<? super S>,T>, Pair.ReverseSecondComparator<S,T extends Comparable<? super T>>, Pair.SecondComparator<S,T extends Comparable<? super T>>
Constructor and Description |
---|
DataSet(org.jblas.DoubleMatrix first,
org.jblas.DoubleMatrix second) |
DataSet(Pair<org.jblas.DoubleMatrix,org.jblas.DoubleMatrix> pair) |
Modifier and Type | Method and Description |
---|---|
void |
addRow(DataSet d,
int i) |
List<DataSet> |
asList() |
List<List<DataSet>> |
batchBy(int num) |
List<List<DataSet>> |
batchByNumLabels() |
DataSet |
copy() |
List<DataSet> |
dataSetBatches(int num) |
static DataSet |
empty() |
org.jblas.DoubleMatrix |
exampleMaxs() |
org.jblas.DoubleMatrix |
exampleMeans() |
org.jblas.DoubleMatrix |
exampleSums() |
DataSet |
get(int i) |
Iterator<DataSet> |
iterator() |
DataSetIterator |
iterator(int batches) |
static DataSet |
load(File path) |
void |
load(InputStream is) |
static void |
main(String[] args) |
static DataSet |
merge(List<DataSet> data) |
int |
numExamples() |
int |
numInputs() |
int |
numOutcomes() |
int |
outcome() |
void |
roundToTheNearest(int roundTo) |
DataSet |
sample(int numSamples)
Sample without replacement and a random rng
|
DataSet |
sample(int numSamples,
boolean withReplacement)
Sample a dataset numSamples times
|
DataSet |
sample(int numSamples,
org.apache.commons.math3.random.RandomGenerator rng)
Sample without replacement
|
DataSet |
sample(int numSamples,
org.apache.commons.math3.random.RandomGenerator rng,
boolean withReplacement)
Sample a dataset
|
void |
saveTo(File file,
boolean binary) |
List<List<DataSet>> |
sortAndBatchByNumLabels()
Sorts the dataset by label:
Splits the data set such that examples are sorted by their labels.
|
void |
sortByLabel()
Organizes the dataset to minimize sampling error
while still allowing efficient batching.
|
Pair<DataSet,DataSet> |
splitTestAndTrain(int numHoldout) |
String |
toString() |
void |
validate() |
void |
write(OutputStream os) |
public DataSet(Pair<org.jblas.DoubleMatrix,org.jblas.DoubleMatrix> pair)
public DataSet(org.jblas.DoubleMatrix first, org.jblas.DoubleMatrix second)
public DataSetIterator iterator(int batches)
public DataSet copy()
public static DataSet empty()
public int numInputs()
public void validate()
public int outcome()
public DataSet get(int i)
public List<List<DataSet>> sortAndBatchByNumLabels()
public void sortByLabel()
public void addRow(DataSet d, int i)
public org.jblas.DoubleMatrix exampleSums()
public org.jblas.DoubleMatrix exampleMaxs()
public org.jblas.DoubleMatrix exampleMeans()
public void saveTo(File file, boolean binary) throws IOException
IOException
public static DataSet load(File path) throws IOException
IOException
public DataSet sample(int numSamples)
numSamples
- the number of samples to getpublic DataSet sample(int numSamples, org.apache.commons.math3.random.RandomGenerator rng)
numSamples
- the number of samples to getrng
- the rng to usepublic DataSet sample(int numSamples, boolean withReplacement)
numSamples
- the number of samples to getwithReplacement
- the rng to usepublic DataSet sample(int numSamples, org.apache.commons.math3.random.RandomGenerator rng, boolean withReplacement)
numSamples
- the number of samples to getrng
- the rng to usewithReplacement
- whether to allow duplicates (only tracked by example row number)public void roundToTheNearest(int roundTo)
public int numOutcomes()
public int numExamples()
public String toString()
public static void main(String[] args) throws IOException
IOException
public void write(OutputStream os)
write
in interface Persistable
public void load(InputStream is)
load
in interface Persistable
Copyright © 2014. All Rights Reserved.