public class ParagraphVectors extends Word2Vec
Modifier and Type | Class and Description |
---|---|
class |
ParagraphVectors.BlindInferenceCallable |
static class |
ParagraphVectors.Builder |
class |
ParagraphVectors.InferenceCallable |
SequenceVectors.AsyncSequencer
Modifier and Type | Field and Description |
---|---|
protected AtomicLong |
countFinished |
protected AtomicLong |
countSubmitted |
protected org.threadly.concurrent.PriorityScheduler |
inferenceExecutor |
protected Object |
inferenceLocker |
protected LabelAwareIterator |
labelAwareIterator |
protected List<VocabWord> |
labelsList |
protected org.nd4j.linalg.api.ndarray.INDArray |
labelsMatrix |
protected LabelsSource |
labelsSource |
protected boolean |
normalizedLabels |
sentenceIter, tokenizerFactory
configuration, configured, elementsLearningAlgorithm, enableScavenger, eventListeners, existingModel, intersectModel, iterator, lockFactor, log, scoreElements, scoreSequences, sequenceLearningAlgorithm, unknownElement, vocabLimit
batchSize, DEFAULT_UNK, layerSize, learningRate, learningRateDecayWords, lookupTable, minLearningRate, minWordFrequency, modelUtils, negative, numEpochs, numIterations, resetModel, sampling, seed, stopWords, trainElementsVectors, trainSequenceVectors, useAdeGrad, useUnknown, variableWindows, vocab, window, workers
Modifier | Constructor and Description |
---|---|
protected |
ParagraphVectors() |
Modifier and Type | Method and Description |
---|---|
void |
extractLabels() |
void |
fit()
Starts training over
|
static ParagraphVectors |
fromJson(String jsonString) |
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(LabelledDocument document)
This method calculates inferred vector for given document, with default parameters for learning rate and iterations
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(LabelledDocument document,
double learningRate,
double minLearningRate,
int iterations)
This method calculates inferred vector for given document
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(@NonNull List<VocabWord> document)
This method calculates inferred vector for given list of words, with default parameters for learning rate and iterations
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(@NonNull List<VocabWord> document,
double learningRate,
double minLearningRate,
int iterations)
This method calculates inferred vector for given document
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(String text)
This method calculates inferred vector for given text, with default parameters for learning rate and iterations
|
org.nd4j.linalg.api.ndarray.INDArray |
inferVector(String text,
double learningRate,
double minLearningRate,
int iterations)
This method calculates inferred vector for given text
|
Future<org.nd4j.common.primitives.Pair<String,org.nd4j.linalg.api.ndarray.INDArray>> |
inferVectorBatched(@NonNull LabelledDocument document)
This method implements batched inference, based on Java Future parallelism model.
|
List<org.nd4j.linalg.api.ndarray.INDArray> |
inferVectorBatched(@NonNull List<String> documents)
This method does inference on a given List<String>
|
Future<org.nd4j.linalg.api.ndarray.INDArray> |
inferVectorBatched(@NonNull String document)
This method implements batched inference, based on Java Future parallelism model.
|
protected void |
initInference() |
Collection<String> |
nearestLabels(@NonNull Collection<VocabWord> document,
int topN)
This method returns top N labels nearest to specified set of vocab words
|
Collection<String> |
nearestLabels(org.nd4j.linalg.api.ndarray.INDArray labelVector,
int topN)
This method returns top N labels nearest to specified features vector
|
Collection<String> |
nearestLabels(LabelledDocument document,
int topN)
This method returns top N labels nearest to specified document
|
Collection<String> |
nearestLabels(@NonNull String rawText,
int topN)
This method returns top N labels nearest to specified text
|
String |
predict(LabelledDocument document)
This method predicts label of the document.
|
String |
predict(List<VocabWord> document)
This method predicts label of the document.
|
String |
predict(String rawText)
Deprecated.
|
Collection<String> |
predictSeveral(@NonNull LabelledDocument document,
int limit)
Predict several labels based on the document.
|
Collection<String> |
predictSeveral(List<VocabWord> document,
int limit)
Predict several labels based on the document.
|
Collection<String> |
predictSeveral(String rawText,
int limit)
Predict several labels based on the document.
|
protected void |
reassignExistingModel() |
void |
setSequenceIterator(@NonNull SequenceIterator<VocabWord> iterator)
This method defines SequenceIterator instance, that will be used as training corpus source.
|
double |
similarityToLabel(LabelledDocument document,
String label)
This method returns similarity of the document to specific label, based on mean value
|
double |
similarityToLabel(List<VocabWord> document,
String label)
This method returns similarity of the document to specific label, based on mean value
|
double |
similarityToLabel(String rawText,
String label)
Deprecated.
|
String |
toJson() |
setSentenceIterator, setTokenizerFactory
buildVocab, getElementsScore, getSequencesScore, getUNK, getWordVectorMatrix, initLearners, setUNK, trainSequence
accuracy, getLayerSize, getWordVector, getWordVectorMatrixNormalized, getWordVectors, getWordVectorsMean, hasWord, indexOf, jsonSerializable, loadWeightsInto, lookupTable, outOfVocabularySupported, setLookupTable, setModelUtils, setVocab, similarity, similarWordsInVocabTo, update, update, vectorSize, vocab, vocabSize, wordsNearest, wordsNearest, wordsNearest, wordsNearestSum, wordsNearestSum, wordsNearestSum
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
accuracy, getWordVector, getWordVectorMatrixNormalized, getWordVectors, getWordVectorsMean, hasWord, indexOf, lookupTable, outOfVocabularySupported, setModelUtils, similarity, similarWordsInVocabTo, vocab, wordsNearest, wordsNearest, wordsNearest, wordsNearestSum, wordsNearestSum, wordsNearestSum
jsonSerializable, loadWeightsInto, vectorSize, vocabSize
protected LabelsSource labelsSource
protected transient LabelAwareIterator labelAwareIterator
protected org.nd4j.linalg.api.ndarray.INDArray labelsMatrix
protected boolean normalizedLabels
protected final transient Object inferenceLocker
protected transient org.threadly.concurrent.PriorityScheduler inferenceExecutor
protected transient AtomicLong countSubmitted
protected transient AtomicLong countFinished
protected void initInference()
@Deprecated public String predict(String rawText)
rawText
- public void setSequenceIterator(@NonNull @NonNull SequenceIterator<VocabWord> iterator)
setSequenceIterator
in class Word2Vec
iterator
- public String predict(LabelledDocument document)
document
- the documentpublic void extractLabels()
public org.nd4j.linalg.api.ndarray.INDArray inferVector(String text, double learningRate, double minLearningRate, int iterations)
text
- protected void reassignExistingModel()
public org.nd4j.linalg.api.ndarray.INDArray inferVector(LabelledDocument document, double learningRate, double minLearningRate, int iterations)
document
- public org.nd4j.linalg.api.ndarray.INDArray inferVector(@NonNull @NonNull List<VocabWord> document, double learningRate, double minLearningRate, int iterations)
document
- public org.nd4j.linalg.api.ndarray.INDArray inferVector(String text)
text
- public org.nd4j.linalg.api.ndarray.INDArray inferVector(LabelledDocument document)
document
- public org.nd4j.linalg.api.ndarray.INDArray inferVector(@NonNull @NonNull List<VocabWord> document)
document
- public Future<org.nd4j.common.primitives.Pair<String,org.nd4j.linalg.api.ndarray.INDArray>> inferVectorBatched(@NonNull @NonNull LabelledDocument document)
document
- public Future<org.nd4j.linalg.api.ndarray.INDArray> inferVectorBatched(@NonNull @NonNull String document)
document
- public List<org.nd4j.linalg.api.ndarray.INDArray> inferVectorBatched(@NonNull @NonNull List<String> documents)
documents
- public String predict(List<VocabWord> document)
document
- the documentpublic Collection<String> predictSeveral(@NonNull @NonNull LabelledDocument document, int limit)
document
- raw text of the documentpublic Collection<String> predictSeveral(String rawText, int limit)
rawText
- raw text of the documentpublic Collection<String> predictSeveral(List<VocabWord> document, int limit)
document
- the documentpublic Collection<String> nearestLabels(LabelledDocument document, int topN)
document
- topN
- public Collection<String> nearestLabels(@NonNull @NonNull String rawText, int topN)
rawText
- topN
- public Collection<String> nearestLabels(@NonNull @NonNull Collection<VocabWord> document, int topN)
document
- topN
- public Collection<String> nearestLabels(org.nd4j.linalg.api.ndarray.INDArray labelVector, int topN)
labelVector
- topN
- @Deprecated public double similarityToLabel(String rawText, String label)
rawText
- label
- public void fit()
SequenceVectors
fit
in class SequenceVectors<VocabWord>
public double similarityToLabel(LabelledDocument document, String label)
document
- label
- public double similarityToLabel(List<VocabWord> document, String label)
document
- label
- public String toJson() throws org.nd4j.shade.jackson.core.JsonProcessingException
public static ParagraphVectors fromJson(String jsonString) throws IOException
IOException
Copyright © 2022. All rights reserved.