public class FastText extends Object implements WordVectors, Serializable
| Modifier and Type | Method and Description |
|---|---|
Map<String,Double> |
accuracy(List<String> questions)
Accuracy based on questions which are a space separated list of strings
where the first word is the query word, the next 2 words are negative,
and the last word is the predicted word to be nearest
|
void |
fit() |
int |
getContextWindowSize() |
int |
getDimension() |
int |
getEpoch() |
String |
getLabelPrefix() |
double |
getLearningRate() |
String |
getLossName() |
String |
getModelName() |
int |
getNegativesNumber() |
int |
getNumberOfBuckets() |
String |
getUNK() |
int |
getWordNgrams() |
double[] |
getWordVector(String word)
Get the word vector for a given matrix
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectorMatrix(String word)
Get the word vector for a given matrix
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectorMatrixNormalized(String word)
Returns the word vector divided by the norm2 of the array
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectors(Collection<String> labels)
This method returns 2D array, where each row represents corresponding word/label
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectorsMean(Collection<String> labels)
This method returns mean vector, built from words/labels passed in
|
boolean |
hasWord(String word)
Returns true if the model has this word in the vocab
|
int |
indexOf(String word) |
boolean |
jsonSerializable() |
void |
loadBinaryModel(String modelPath) |
void |
loadIterator() |
void |
loadPretrainedVectors(File vectorsFile) |
void |
loadWeightsInto(org.nd4j.linalg.api.ndarray.INDArray array) |
WeightLookupTable |
lookupTable()
Lookup table for the vectors
|
boolean |
outOfVocabularySupported()
Does implementation vectorize words absent in vocabulary
|
String |
predict(String text) |
org.nd4j.common.primitives.Pair<String,Float> |
predictProbability(String text) |
void |
setModelUtils(ModelUtils utils)
Specifies ModelUtils to be used to access model
|
void |
setUNK(String input) |
double |
similarity(String word,
String word2)
Returns the similarity of 2 words
|
List<String> |
similarWordsInVocabTo(String word,
double accuracy)
Find all words with a similar characters
in the vocab
|
void |
test(File testFile) |
void |
unloadBinaryModel() |
int |
vectorSize() |
VocabCache |
vocab()
Vocab for the vectors
|
long |
vocabSize() |
Collection<String> |
wordsNearest(Collection<String> positive,
Collection<String> negative,
int top)
Words nearest based on positive and negative words
|
Collection<String> |
wordsNearest(org.nd4j.linalg.api.ndarray.INDArray words,
int top) |
Collection<String> |
wordsNearest(String word,
int n)
Get the top n words most similar to the given word
|
Collection<String> |
wordsNearestSum(Collection<String> positive,
Collection<String> negative,
int top)
Words nearest based on positive and negative words
|
Collection<String> |
wordsNearestSum(org.nd4j.linalg.api.ndarray.INDArray words,
int top) |
Collection<String> |
wordsNearestSum(String word,
int n)
Get the top n words most similar to the given word
|
public FastText(File modelPath)
public FastText()
public void fit()
public void loadIterator()
public void loadPretrainedVectors(File vectorsFile)
public void loadBinaryModel(String modelPath)
public void unloadBinaryModel()
public void test(File testFile)
public org.nd4j.common.primitives.Pair<String,Float> predictProbability(String text)
public VocabCache vocab()
WordVectorsvocab in interface WordVectorspublic long vocabSize()
vocabSize in interface org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializerpublic String getUNK()
getUNK in interface WordVectorspublic void setUNK(String input)
setUNK in interface WordVectorspublic double[] getWordVector(String word)
WordVectorsgetWordVector in interface WordVectorsword - the word to get the matrix forpublic org.nd4j.linalg.api.ndarray.INDArray getWordVectorMatrixNormalized(String word)
WordVectorsgetWordVectorMatrixNormalized in interface WordVectorsword - the word to get the matrix forpublic org.nd4j.linalg.api.ndarray.INDArray getWordVectorMatrix(String word)
WordVectorsgetWordVectorMatrix in interface WordVectorsword - the word to get the matrix forpublic org.nd4j.linalg.api.ndarray.INDArray getWordVectors(Collection<String> labels)
WordVectorsgetWordVectors in interface WordVectorspublic org.nd4j.linalg.api.ndarray.INDArray getWordVectorsMean(Collection<String> labels)
WordVectorsgetWordVectorsMean in interface WordVectorspublic boolean hasWord(String word)
WordVectorshasWord in interface WordVectorsword - the word to test forpublic Collection<String> wordsNearest(org.nd4j.linalg.api.ndarray.INDArray words, int top)
wordsNearest in interface WordVectorspublic Collection<String> wordsNearestSum(org.nd4j.linalg.api.ndarray.INDArray words, int top)
wordsNearestSum in interface WordVectorspublic Collection<String> wordsNearestSum(String word, int n)
WordVectorswordsNearestSum in interface WordVectorsword - the word to comparen - the n to getpublic Collection<String> wordsNearestSum(Collection<String> positive, Collection<String> negative, int top)
WordVectorswordsNearestSum in interface WordVectorspositive - the positive wordsnegative - the negative wordstop - the top n wordspublic Map<String,Double> accuracy(List<String> questions)
WordVectorsaccuracy in interface WordVectorsquestions - the questions to askpublic int indexOf(String word)
indexOf in interface WordVectorspublic List<String> similarWordsInVocabTo(String word, double accuracy)
WordVectorssimilarWordsInVocabTo in interface WordVectorsword - the word to compareaccuracy - the accuracy: 0 to 1public Collection<String> wordsNearest(Collection<String> positive, Collection<String> negative, int top)
WordVectorswordsNearest in interface WordVectorspositive - the positive wordsnegative - the negative wordstop - the top n wordspublic Collection<String> wordsNearest(String word, int n)
WordVectorswordsNearest in interface WordVectorsword - the word to comparen - the n to getpublic double similarity(String word, String word2)
WordVectorssimilarity in interface WordVectorsword - the first wordword2 - the second wordpublic WeightLookupTable lookupTable()
WordVectorslookupTable in interface WordVectorspublic void setModelUtils(ModelUtils utils)
WordVectorssetModelUtils in interface WordVectorspublic void loadWeightsInto(org.nd4j.linalg.api.ndarray.INDArray array)
loadWeightsInto in interface org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializerpublic int vectorSize()
vectorSize in interface org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializerpublic boolean jsonSerializable()
jsonSerializable in interface org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializerpublic double getLearningRate()
public int getDimension()
public int getContextWindowSize()
public int getEpoch()
public int getNegativesNumber()
public int getWordNgrams()
public String getLossName()
public String getModelName()
public int getNumberOfBuckets()
public String getLabelPrefix()
public boolean outOfVocabularySupported()
WordVectorsoutOfVocabularySupported in interface WordVectorsCopyright © 2022. All rights reserved.