public class Word2Vec extends Object implements Persistable
Modifier and Type | Class and Description |
---|---|
static class |
Word2Vec.Builder |
Constructor and Description |
---|
Word2Vec() |
Modifier and Type | Method and Description |
---|---|
TreeSet<VocabWord> |
analogy(String word0,
String word1,
String word2) |
List<String> |
analogyWords(String w1,
String w2,
String w3)
Brings back a list of words that are analagous to the 3 words
presented in vector space
|
boolean |
buildVocab()
Builds the vocabulary for training
|
Set<VocabWord> |
distance(String word) |
void |
fit()
Train the model
|
VocabCache |
getCache() |
int |
getLayerSize() |
SentenceIterator |
getSentenceIter() |
List<String> |
getStopWords() |
TokenizerFactory |
getTokenizerFactory() |
int |
getWindow() |
double[] |
getWordVector(String word)
Get the word vector for a given matrix
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectorMatrix(String word)
Get the word vector for a given matrix
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectorMatrixNormalized(String word)
Returns the word vector divided by the norm2 of the array
|
boolean |
hasWord(String word)
Returns true if the model has this word in the vocab
|
int |
indexOf(String word) |
void |
iterate(VocabWord w1,
VocabWord w2,
AtomicLong nextRandom)
Train the word vector
on the given words
|
void |
load(InputStream is) |
void |
plotTsne()
Create a tsne plot
|
void |
setCache(VocabCache cache) |
void |
setLayerSize(int layerSize) |
void |
setSentenceIter(SentenceIterator sentenceIter)
Note that calling a setter on this
means assumes that this is a training continuation
and therefore weights should not be reset.
|
void |
setTokenizerFactory(TokenizerFactory tokenizerFactory) |
void |
setup() |
double |
similarity(String word,
String word2)
Returns the similarity of 2 words
|
List<String> |
similarWordsInVocabTo(String word,
double accuracy)
Find all words with a similar characters
in the vocab
|
void |
skipGram(int i,
List<VocabWord> sentence,
int b,
AtomicLong nextRandom)
Train via skip gram
|
void |
trainSentence(List<VocabWord> sentence,
AtomicInteger numWordsSoFar,
AtomicLong nextRandom)
Train on a list of vocab words
|
Collection<String> |
wordsNearest(String word,
int n)
Get the top n words most similar to the given word
|
void |
write(OutputStream os) |
public static final String UNK
public List<String> similarWordsInVocabTo(String word, double accuracy)
word
- the word to compareaccuracy
- the accuracy: 0 to 1public int indexOf(String word)
public double[] getWordVector(String word)
word
- the word to get the matrix forpublic org.nd4j.linalg.api.ndarray.INDArray getWordVectorMatrix(String word)
word
- the word to get the matrix forpublic org.nd4j.linalg.api.ndarray.INDArray getWordVectorMatrixNormalized(String word)
word
- the word to get the matrix forpublic Collection<String> wordsNearest(String word, int n)
word
- the word to comparen
- the n to getpublic List<String> analogyWords(String w1, String w2, String w3)
w1
- w2
- w3
- public boolean hasWord(String word)
word
- the word to test forpublic void fit()
public void setup()
public boolean buildVocab()
public void plotTsne()
public void trainSentence(List<VocabWord> sentence, AtomicInteger numWordsSoFar, AtomicLong nextRandom)
sentence
- the list of vocab words to train onpublic void skipGram(int i, List<VocabWord> sentence, int b, AtomicLong nextRandom)
i
- sentence
- public void iterate(VocabWord w1, VocabWord w2, AtomicLong nextRandom)
w1
- the first word to fitpublic double similarity(String word, String word2)
word
- the first wordword2
- the second wordpublic void write(OutputStream os)
write
in interface Persistable
public void load(InputStream is)
load
in interface Persistable
public void setSentenceIter(SentenceIterator sentenceIter)
sentenceIter
- public int getLayerSize()
public void setLayerSize(int layerSize)
public int getWindow()
public SentenceIterator getSentenceIter()
public TokenizerFactory getTokenizerFactory()
public void setTokenizerFactory(TokenizerFactory tokenizerFactory)
public VocabCache getCache()
public void setCache(VocabCache cache)
Copyright © 2014. All rights reserved.