public class FastText extends Object implements WordVectors, Serializable
Modifier and Type | Method and Description |
---|---|
Map<String,Double> |
accuracy(List<String> questions)
Accuracy based on questions which are a space separated list of strings
where the first word is the query word, the next 2 words are negative,
and the last word is the predicted word to be nearest
|
void |
fit() |
int |
getContextWindowSize() |
int |
getDimension() |
int |
getEpoch() |
String |
getLabelPrefix() |
double |
getLearningRate() |
String |
getLossName() |
String |
getModelName() |
int |
getNegativesNumber() |
int |
getNumberOfBuckets() |
String |
getUNK() |
int |
getWordNgrams() |
double[] |
getWordVector(String word)
Get the word vector for a given matrix
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectorMatrix(String word)
Get the word vector for a given matrix
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectorMatrixNormalized(String word)
Returns the word vector divided by the norm2 of the array
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectors(Collection<String> labels)
This method returns 2D array, where each row represents corresponding word/label
|
org.nd4j.linalg.api.ndarray.INDArray |
getWordVectorsMean(Collection<String> labels)
This method returns mean vector, built from words/labels passed in
|
boolean |
hasWord(String word)
Returns true if the model has this word in the vocab
|
int |
indexOf(String word) |
boolean |
jsonSerializable() |
void |
loadBinaryModel(String modelPath) |
void |
loadIterator() |
void |
loadPretrainedVectors(File vectorsFile) |
void |
loadWeightsInto(org.nd4j.linalg.api.ndarray.INDArray array) |
WeightLookupTable |
lookupTable()
Lookup table for the vectors
|
boolean |
outOfVocabularySupported()
Does implementation vectorize words absent in vocabulary
|
String |
predict(String text) |
org.nd4j.common.primitives.Pair<String,Float> |
predictProbability(String text) |
void |
setModelUtils(ModelUtils utils)
Specifies ModelUtils to be used to access model
|
void |
setUNK(String input) |
double |
similarity(String word,
String word2)
Returns the similarity of 2 words
|
List<String> |
similarWordsInVocabTo(String word,
double accuracy)
Find all words with a similar characters
in the vocab
|
void |
test(File testFile) |
void |
unloadBinaryModel() |
int |
vectorSize() |
VocabCache |
vocab()
Vocab for the vectors
|
long |
vocabSize() |
Collection<String> |
wordsNearest(Collection<String> positive,
Collection<String> negative,
int top)
Words nearest based on positive and negative words
|
Collection<String> |
wordsNearest(org.nd4j.linalg.api.ndarray.INDArray words,
int top) |
Collection<String> |
wordsNearest(String word,
int n)
Get the top n words most similar to the given word
|
Collection<String> |
wordsNearestSum(Collection<String> positive,
Collection<String> negative,
int top)
Words nearest based on positive and negative words
|
Collection<String> |
wordsNearestSum(org.nd4j.linalg.api.ndarray.INDArray words,
int top) |
Collection<String> |
wordsNearestSum(String word,
int n)
Get the top n words most similar to the given word
|
public FastText(File modelPath)
public FastText()
public void fit()
public void loadIterator()
public void loadPretrainedVectors(File vectorsFile)
public void loadBinaryModel(String modelPath)
public void unloadBinaryModel()
public void test(File testFile)
public org.nd4j.common.primitives.Pair<String,Float> predictProbability(String text)
public VocabCache vocab()
WordVectors
vocab
in interface WordVectors
public long vocabSize()
vocabSize
in interface org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializer
public String getUNK()
getUNK
in interface WordVectors
public void setUNK(String input)
setUNK
in interface WordVectors
public double[] getWordVector(String word)
WordVectors
getWordVector
in interface WordVectors
word
- the word to get the matrix forpublic org.nd4j.linalg.api.ndarray.INDArray getWordVectorMatrixNormalized(String word)
WordVectors
getWordVectorMatrixNormalized
in interface WordVectors
word
- the word to get the matrix forpublic org.nd4j.linalg.api.ndarray.INDArray getWordVectorMatrix(String word)
WordVectors
getWordVectorMatrix
in interface WordVectors
word
- the word to get the matrix forpublic org.nd4j.linalg.api.ndarray.INDArray getWordVectors(Collection<String> labels)
WordVectors
getWordVectors
in interface WordVectors
public org.nd4j.linalg.api.ndarray.INDArray getWordVectorsMean(Collection<String> labels)
WordVectors
getWordVectorsMean
in interface WordVectors
public boolean hasWord(String word)
WordVectors
hasWord
in interface WordVectors
word
- the word to test forpublic Collection<String> wordsNearest(org.nd4j.linalg.api.ndarray.INDArray words, int top)
wordsNearest
in interface WordVectors
public Collection<String> wordsNearestSum(org.nd4j.linalg.api.ndarray.INDArray words, int top)
wordsNearestSum
in interface WordVectors
public Collection<String> wordsNearestSum(String word, int n)
WordVectors
wordsNearestSum
in interface WordVectors
word
- the word to comparen
- the n to getpublic Collection<String> wordsNearestSum(Collection<String> positive, Collection<String> negative, int top)
WordVectors
wordsNearestSum
in interface WordVectors
positive
- the positive wordsnegative
- the negative wordstop
- the top n wordspublic Map<String,Double> accuracy(List<String> questions)
WordVectors
accuracy
in interface WordVectors
questions
- the questions to askpublic int indexOf(String word)
indexOf
in interface WordVectors
public List<String> similarWordsInVocabTo(String word, double accuracy)
WordVectors
similarWordsInVocabTo
in interface WordVectors
word
- the word to compareaccuracy
- the accuracy: 0 to 1public Collection<String> wordsNearest(Collection<String> positive, Collection<String> negative, int top)
WordVectors
wordsNearest
in interface WordVectors
positive
- the positive wordsnegative
- the negative wordstop
- the top n wordspublic Collection<String> wordsNearest(String word, int n)
WordVectors
wordsNearest
in interface WordVectors
word
- the word to comparen
- the n to getpublic double similarity(String word, String word2)
WordVectors
similarity
in interface WordVectors
word
- the first wordword2
- the second wordpublic WeightLookupTable lookupTable()
WordVectors
lookupTable
in interface WordVectors
public void setModelUtils(ModelUtils utils)
WordVectors
setModelUtils
in interface WordVectors
public void loadWeightsInto(org.nd4j.linalg.api.ndarray.INDArray array)
loadWeightsInto
in interface org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializer
public int vectorSize()
vectorSize
in interface org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializer
public boolean jsonSerializable()
jsonSerializable
in interface org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializer
public double getLearningRate()
public int getDimension()
public int getContextWindowSize()
public int getEpoch()
public int getNegativesNumber()
public int getWordNgrams()
public String getLossName()
public String getModelName()
public int getNumberOfBuckets()
public String getLabelPrefix()
public boolean outOfVocabularySupported()
WordVectors
outOfVocabularySupported
in interface WordVectors
Copyright © 2022. All rights reserved.