public abstract class BaseTextVectorizer extends Object implements TextVectorizer
| Modifier and Type | Field and Description |
|---|---|
protected VocabCache |
cache |
protected DocumentIterator |
docIter |
protected List<String> |
labels |
protected int |
minWordFrequency |
protected SentenceIterator |
sentenceIterator |
protected List<String> |
stopWords |
protected TokenizerFactory |
tokenizerFactory |
protected static akka.actor.ActorSystem |
trainingSystem |
| Modifier | Constructor and Description |
|---|---|
|
BaseTextVectorizer() |
protected |
BaseTextVectorizer(VocabCache cache,
TokenizerFactory tokenizerFactory,
List<String> stopWords,
int layerSize,
int minWordFrequency,
DocumentIterator docIter,
SentenceIterator sentenceIterator,
List<String> labels,
InvertedIndex index) |
| Modifier and Type | Method and Description |
|---|---|
void |
fit()
Train the model
|
VocabCache |
getCache() |
DocumentIterator |
getDocIter() |
int |
getLayerSize() |
int |
getMinWordFrequency() |
SentenceIterator |
getSentenceIterator() |
List<String> |
getStopWords() |
TokenizerFactory |
getTokenizerFactory() |
InvertedIndex |
index()
Inverted index
|
int |
numWordsEncountered()
Returns the number of words encountered so far
|
void |
setCache(VocabCache cache) |
void |
setDocIter(DocumentIterator docIter) |
void |
setLayerSize(int layerSize) |
void |
setMinWordFrequency(int minWordFrequency) |
void |
setSentenceIterator(SentenceIterator sentenceIterator) |
void |
setStopWords(List<String> stopWords) |
void |
setTokenizerFactory(TokenizerFactory tokenizerFactory) |
VocabCache |
vocab()
The vocab sorted in descending order
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waittransform, vectorize, vectorize, vectorizevectorizeprotected transient VocabCache cache
protected static akka.actor.ActorSystem trainingSystem
protected transient TokenizerFactory tokenizerFactory
protected int minWordFrequency
protected transient DocumentIterator docIter
protected transient SentenceIterator sentenceIterator
public BaseTextVectorizer()
protected BaseTextVectorizer(VocabCache cache, TokenizerFactory tokenizerFactory, List<String> stopWords, int layerSize, int minWordFrequency, DocumentIterator docIter, SentenceIterator sentenceIterator, List<String> labels, InvertedIndex index)
public void fit()
TextVectorizerfit in interface TextVectorizerpublic VocabCache vocab()
TextVectorizervocab in interface TextVectorizerpublic SentenceIterator getSentenceIterator()
public void setSentenceIterator(SentenceIterator sentenceIterator)
public DocumentIterator getDocIter()
public void setDocIter(DocumentIterator docIter)
public int getMinWordFrequency()
public void setMinWordFrequency(int minWordFrequency)
public int getLayerSize()
public void setLayerSize(int layerSize)
public TokenizerFactory getTokenizerFactory()
public void setTokenizerFactory(TokenizerFactory tokenizerFactory)
public VocabCache getCache()
public void setCache(VocabCache cache)
public int numWordsEncountered()
TextVectorizernumWordsEncountered in interface TextVectorizerpublic InvertedIndex index()
TextVectorizerindex in interface TextVectorizerCopyright © 2014. All rights reserved.