public abstract class BaseTextVectorizer extends Object implements TextVectorizer
Modifier and Type | Field and Description |
---|---|
protected VocabCache |
cache |
protected DocumentIterator |
docIter |
protected List<String> |
labels |
protected int |
minWordFrequency |
protected SentenceIterator |
sentenceIterator |
protected List<String> |
stopWords |
protected TokenizerFactory |
tokenizerFactory |
protected static akka.actor.ActorSystem |
trainingSystem |
Modifier | Constructor and Description |
---|---|
|
BaseTextVectorizer() |
protected |
BaseTextVectorizer(VocabCache cache,
TokenizerFactory tokenizerFactory,
List<String> stopWords,
int layerSize,
int minWordFrequency,
DocumentIterator docIter,
SentenceIterator sentenceIterator,
List<String> labels,
InvertedIndex index) |
Modifier and Type | Method and Description |
---|---|
void |
fit()
Train the model
|
VocabCache |
getCache() |
DocumentIterator |
getDocIter() |
int |
getLayerSize() |
int |
getMinWordFrequency() |
SentenceIterator |
getSentenceIterator() |
List<String> |
getStopWords() |
TokenizerFactory |
getTokenizerFactory() |
InvertedIndex |
index()
Inverted index
|
int |
numWordsEncountered()
Returns the number of words encountered so far
|
void |
setCache(VocabCache cache) |
void |
setDocIter(DocumentIterator docIter) |
void |
setLayerSize(int layerSize) |
void |
setMinWordFrequency(int minWordFrequency) |
void |
setSentenceIterator(SentenceIterator sentenceIterator) |
void |
setStopWords(List<String> stopWords) |
void |
setTokenizerFactory(TokenizerFactory tokenizerFactory) |
VocabCache |
vocab()
The vocab sorted in descending order
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
transform, vectorize, vectorize, vectorize
vectorize
protected transient VocabCache cache
protected static akka.actor.ActorSystem trainingSystem
protected transient TokenizerFactory tokenizerFactory
protected int minWordFrequency
protected transient DocumentIterator docIter
protected transient SentenceIterator sentenceIterator
public BaseTextVectorizer()
protected BaseTextVectorizer(VocabCache cache, TokenizerFactory tokenizerFactory, List<String> stopWords, int layerSize, int minWordFrequency, DocumentIterator docIter, SentenceIterator sentenceIterator, List<String> labels, InvertedIndex index)
public void fit()
TextVectorizer
fit
in interface TextVectorizer
public VocabCache vocab()
TextVectorizer
vocab
in interface TextVectorizer
public SentenceIterator getSentenceIterator()
public void setSentenceIterator(SentenceIterator sentenceIterator)
public DocumentIterator getDocIter()
public void setDocIter(DocumentIterator docIter)
public int getMinWordFrequency()
public void setMinWordFrequency(int minWordFrequency)
public int getLayerSize()
public void setLayerSize(int layerSize)
public TokenizerFactory getTokenizerFactory()
public void setTokenizerFactory(TokenizerFactory tokenizerFactory)
public VocabCache getCache()
public void setCache(VocabCache cache)
public int numWordsEncountered()
TextVectorizer
numWordsEncountered
in interface TextVectorizer
public InvertedIndex index()
TextVectorizer
index
in interface TextVectorizer
Copyright © 2014. All rights reserved.