public class StringColumn extends AbstractColumn implements CategoricalColumn, StringFilters, StringMapFunctions, StringReduceUtils
Because the MISSING_VALUE for this column type is an empty string, there is little or no need for special handling of missing values in this class's methods.
Modifier and Type | Field and Description |
---|---|
static String |
MISSING_VALUE |
DEFAULT_ARRAY_SIZE
Modifier and Type | Method and Description |
---|---|
StringColumn |
addAll(List<String> stringValues)
Add all the strings in the list to this column
|
void |
append(Column column) |
StringColumn |
append(String value)
Added for naming consistency with all other columns
|
StringColumn |
appendCell(String object) |
StringColumn |
appendMissing()
Appends a missing value appropriate to the column
|
byte[] |
asBytes(int rowNumber)
Returns the contents of the cell at rowNumber as a byte[]
|
double[] |
asDoubleArray() |
int[] |
asIntArray() |
List<String> |
asList()
Returns a List<String> representation of all the values in this column
|
NumberColumn |
asNumberColumn() |
Set<String> |
asSet() |
List<String> |
bottom(int n)
Returns the smallest ("bottom") n values in the column
|
int |
byteSize()
Returns the width of a cell in this column, in bytes.
|
void |
clear() |
boolean |
contains(String aString)
Returns true if this column contains a cell with the given string, and false otherwise
|
static String |
convert(String stringValue) |
StringColumn |
copy()
Returns a deep copy of the receiver
|
Table |
countByCategory() |
int |
countMissing()
Returns the count of missing values in this column
|
double |
countOccurrences(String value) |
int |
countUnique()
Returns the count of unique values in this column.
|
static StringColumn |
create(String name) |
static StringColumn |
create(String name,
int size) |
static StringColumn |
create(String name,
List<String> strings) |
static StringColumn |
create(String name,
String[] strings) |
it.unimi.dsi.fastutil.ints.IntArrayList |
data()
Returns the integers that back this column.
|
StringColumn |
emptyCopy()
Returns a copy of the receiver with no data.
|
StringColumn |
emptyCopy(int rowSize)
Returns an empty copy of the receiver, with its internal storage initialized to the given row size.
|
int |
firstIndexOf(String value) |
String |
get(int rowIndex)
Returns the value at rowIndex in this column.
|
double |
getDouble(int i)
Returns a double representation of the value at the given row.
|
List<BooleanColumn> |
getDummies()
Returns a list of boolean columns suitable for use as dummy variables in, for example, regression analysis,
select a column of categorical data must be encoded as a list of columns, such that each column represents
a single category and indicates whether it is present (1) or not present (0)
|
StringColumnFormatter |
getPrintFormatter() |
String |
getString(int row)
Returns a string representation of the value at the given row.
|
String |
getUnformattedString(int row) |
it.unimi.dsi.fastutil.ints.IntArrayList |
getValues(it.unimi.dsi.fastutil.ints.IntArrayList indexes)
Returns all the values associated with the given indexes.
|
void |
initializeWith(it.unimi.dsi.fastutil.ints.IntArrayList list,
StringColumn old)
Initializes this Column with the given values for performance
|
boolean |
isEmpty()
Returns true if the column has no data
|
Selection |
isEqualTo(String string) |
Selection |
isIn(String... strings) |
boolean |
isMissing(int rowNumber) |
Selection |
isNotEqualTo(String string) |
Selection |
isNotIn(String... strings) |
Iterator<String> |
iterator() |
it.unimi.dsi.fastutil.ints.Int2ObjectMap<String> |
keyToValueMap() |
StringColumn |
lag(int n)
Returns a column of the same type and size as the receiver, containing the receivers values offset by n.
|
StringColumn |
lead(int n)
Returns a column of the same type as the receiver, containing the receivers values offset -n
For example if you lead a column containing 2, 3, 4 by 1, you get a column containing 3, 4, NA.
|
StringColumn |
removeMissing() |
it.unimi.dsi.fastutil.ints.IntComparator |
rowComparator() |
StringColumn |
set(int rowIndex,
String stringValue) |
StringColumn |
set(Selection rowSelection,
String newValue)
Conditionally update this column, replacing current values with newValue for all rows where the current value
matches the selection criteria
|
void |
setPrintFormatter(StringColumnFormatter formatter) |
int |
size()
Returns the number of elements (a.k.a.
|
void |
sortAscending() |
void |
sortDescending() |
Table |
summary() |
List<String> |
top(int n)
Returns the largest ("top") n values in the column
|
ColumnType |
type()
Returns this column's ColumnType
|
StringColumn |
unique()
Returns a new Column containing all the unique values in this column
|
static boolean |
valueIsMissing(String string) |
it.unimi.dsi.fastutil.ints.IntArrayList |
values()
Returns the integer encoded value of each cell in this column.
|
StringColumn |
where(Selection selection) |
columnWidth, name, print, setName, toString
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
containsString, endsWith, equalsIgnoreCase, equalsIgnoreCase, eval, eval, eval, eval, isAlpha, isAlphaNumeric, isEmptyString, isEqualTo, isIn, isLongerThan, isLowerCase, isMissing, isNotEqualTo, isNotIn, isNotMissing, isNumeric, isShorterThan, isUpperCase, lengthEquals, matchesRegex, startsWith, startsWith
abbreviate, commonPrefix, commonSuffix, concatenate, countTokens, distance, format, join, length, lowerCase, padEnd, padStart, replaceAll, replaceAll, replaceFirst, substring, substring, tokenizeAndRemoveDuplicates, tokenizeAndSort, tokenizeAndSort, tokens, trim, uniqueTokens, upperCase
appendAll, appendAll
columnWidth, create, first, inRange, last, name, print, rolling, rows, sampleN, sampleX, setName, subset, summarizeIf, title
forEach, spliterator
public static final String MISSING_VALUE
public static boolean valueIsMissing(String string)
public StringColumn appendMissing()
Column
appendMissing
in interface Column
public static StringColumn create(String name)
public static StringColumn create(String name, String[] strings)
public static StringColumn create(String name, List<String> strings)
public static StringColumn create(String name, int size)
public ColumnType type()
Column
type
in interface Column
type
in class AbstractColumn
ColumnType
public void setPrintFormatter(StringColumnFormatter formatter)
public StringColumnFormatter getPrintFormatter()
public String getString(int row)
Column
public String getUnformattedString(int row)
getUnformattedString
in interface Column
public StringColumn emptyCopy()
Column
public StringColumn emptyCopy(int rowSize)
Column
public void sortAscending()
sortAscending
in interface Column
public void sortDescending()
sortDescending
in interface Column
public int size()
public String get(int rowIndex)
get
in interface StringFilters
rowIndex
- index of the rowIndexOutOfBoundsException
- if the given rowIndex is not in the columnpublic List<String> asList()
NOTE: Unless you really need a string consider using the column itself for large datasets as it uses much less memory
public Table countByCategory()
countByCategory
in interface CategoricalColumn
public int[] asIntArray()
asIntArray
in interface CategoricalColumn
public StringColumn lead(int n)
Column
public StringColumn lag(int n)
Column
For example if you lag a column containing 2, 3, 4 by 1, you get a column containing NA, 2, 3
public StringColumn set(Selection rowSelection, String newValue)
Examples: myCatColumn.set(myCatColumn.isEqualTo("Cat"), "Dog"); // no more cats myCatColumn.set(myCatColumn.valueIsMissing(), "Fox"); // no more missing values
public StringColumn set(int rowIndex, String stringValue)
public int countUnique()
Column
countUnique
in interface Column
public List<String> top(int n)
n
- The maximum number of records to return. The actual number will be smaller if n is greater than the
number of observations in the columnpublic List<String> bottom(int n)
n
- The maximum number of records to return. The actual number will be smaller if n is greater than the
number of observations in the columnpublic void initializeWith(it.unimi.dsi.fastutil.ints.IntArrayList list, StringColumn old)
public boolean contains(String aString)
aString
- the value to look forpublic it.unimi.dsi.fastutil.ints.IntArrayList getValues(it.unimi.dsi.fastutil.ints.IntArrayList indexes)
indexes
- the indexesIntArrayList
public StringColumn addAll(List<String> stringValues)
stringValues
- a list of valuespublic StringColumn appendCell(String object)
appendCell
in interface Column
appendCell
in class AbstractColumn
public it.unimi.dsi.fastutil.ints.IntComparator rowComparator()
rowComparator
in interface Column
public boolean isEmpty()
Column
public Selection isEqualTo(String string)
isEqualTo
in interface StringFilters
public List<BooleanColumn> getDummies()
BooleanColumn
public StringColumn unique()
public it.unimi.dsi.fastutil.ints.IntArrayList data()
IntArrayList
public NumberColumn asNumberColumn()
public StringColumn where(Selection selection)
public StringColumn copy()
Column
public int countMissing()
countMissing
in interface Column
public StringColumn removeMissing()
removeMissing
in interface Column
public it.unimi.dsi.fastutil.ints.IntArrayList values()
IntArrayList
public int byteSize()
Column
public byte[] asBytes(int rowNumber)
public double getDouble(int i)
Column
public double[] asDoubleArray()
asDoubleArray
in interface Column
public StringColumn append(String value)
public Selection isIn(String... strings)
isIn
in interface StringFilters
public Selection isNotIn(String... strings)
isNotIn
in interface StringFilters
public it.unimi.dsi.fastutil.ints.Int2ObjectMap<String> keyToValueMap()
public int firstIndexOf(String value)
public double countOccurrences(String value)
Copyright © 2018. All rights reserved.