Class WordVectorsImpl<T extends SequenceElement>

    • Field Detail

      • minWordFrequency

        protected int minWordFrequency
      • layerSize

        protected int layerSize
      • numIterations

        protected int numIterations
      • numEpochs

        protected int numEpochs
      • negative

        protected double negative
      • sampling

        protected double sampling
      • learningRate

        protected org.nd4j.shade.guava.util.concurrent.AtomicDouble learningRate
      • minLearningRate

        protected double minLearningRate
      • window

        protected int window
      • batchSize

        protected int batchSize
      • learningRateDecayWords

        protected int learningRateDecayWords
      • resetModel

        protected boolean resetModel
      • useAdeGrad

        protected boolean useAdeGrad
      • workers

        protected int workers
      • trainSequenceVectors

        protected boolean trainSequenceVectors
      • trainElementsVectors

        protected boolean trainElementsVectors
      • seed

        protected long seed
      • useUnknown

        protected boolean useUnknown
      • variableWindows

        protected int[] variableWindows
    • Constructor Detail

      • WordVectorsImpl

        public WordVectorsImpl()
    • Method Detail

      • getLayerSize

        public int getLayerSize()
        This method returns word vector size
        Returns:
      • hasWord

        public boolean hasWord​(String word)
        Returns true if the model has this word in the vocab
        Specified by:
        hasWord in interface WordVectors
        Parameters:
        word - the word to test for
        Returns:
        true if the model has the word in the vocab
      • wordsNearestSum

        public Collection<String> wordsNearestSum​(Collection<String> positive,
                                                  Collection<String> negative,
                                                  int top)
        Words nearest based on positive and negative words
        Specified by:
        wordsNearestSum in interface WordVectors
        Parameters:
        positive - the positive words
        negative - the negative words
        top - the top n words
        Returns:
        the words nearest the mean of the words
      • wordsNearestSum

        public Collection<String> wordsNearestSum​(org.nd4j.linalg.api.ndarray.INDArray words,
                                                  int top)
        Words nearest based on positive and negative words * @param top the top n words
        Specified by:
        wordsNearestSum in interface WordVectors
        Returns:
        the words nearest the mean of the words
      • wordsNearest

        public Collection<String> wordsNearest​(org.nd4j.linalg.api.ndarray.INDArray words,
                                               int top)
        Words nearest based on positive and negative words * @param top the top n words
        Specified by:
        wordsNearest in interface WordVectors
        Returns:
        the words nearest the mean of the words
      • wordsNearestSum

        public Collection<String> wordsNearestSum​(String word,
                                                  int n)
        Get the top n words most similar to the given word
        Specified by:
        wordsNearestSum in interface WordVectors
        Parameters:
        word - the word to compare
        n - the n to get
        Returns:
        the top n words
      • accuracy

        public Map<String,​Double> accuracy​(List<String> questions)
        Accuracy based on questions which are a space separated list of strings where the first word is the query word, the next 2 words are negative, and the last word is the predicted word to be nearest
        Specified by:
        accuracy in interface WordVectors
        Parameters:
        questions - the questions to ask
        Returns:
        the accuracy based on these questions
      • similarWordsInVocabTo

        public List<String> similarWordsInVocabTo​(String word,
                                                  double accuracy)
        Find all words with a similar characters in the vocab
        Specified by:
        similarWordsInVocabTo in interface WordVectors
        Parameters:
        word - the word to compare
        accuracy - the accuracy: 0 to 1
        Returns:
        the list of words that are similar in the vocab
      • getWordVector

        public double[] getWordVector​(String word)
        Get the word vector for a given matrix
        Specified by:
        getWordVector in interface WordVectors
        Parameters:
        word - the word to get the matrix for
        Returns:
        the ndarray for this word
      • getWordVectorMatrixNormalized

        public org.nd4j.linalg.api.ndarray.INDArray getWordVectorMatrixNormalized​(String word)
        Returns the word vector divided by the norm2 of the array
        Specified by:
        getWordVectorMatrixNormalized in interface WordVectors
        Parameters:
        word - the word to get the matrix for
        Returns:
        the looked up matrix
      • getWordVectorMatrix

        public org.nd4j.linalg.api.ndarray.INDArray getWordVectorMatrix​(String word)
        Description copied from interface: WordVectors
        Get the word vector for a given matrix
        Specified by:
        getWordVectorMatrix in interface WordVectors
        Parameters:
        word - the word to get the matrix for
        Returns:
        the ndarray for this word
      • wordsNearest

        public Collection<String> wordsNearest​(Collection<String> positive,
                                               Collection<String> negative,
                                               int top)
        Words nearest based on positive and negative words
        Specified by:
        wordsNearest in interface WordVectors
        Parameters:
        positive - the positive words
        negative - the negative words
        top - the top n words
        Returns:
        the words nearest the mean of the words
      • getWordVectors

        public org.nd4j.linalg.api.ndarray.INDArray getWordVectors​(@NonNull
                                                                   @NonNull Collection<String> labels)
        This method returns 2D array, where each row represents corresponding label
        Specified by:
        getWordVectors in interface WordVectors
        Parameters:
        labels -
        Returns:
      • getWordVectorsMean

        public org.nd4j.linalg.api.ndarray.INDArray getWordVectorsMean​(Collection<String> labels)
        This method returns mean vector, built from words/labels passed in
        Specified by:
        getWordVectorsMean in interface WordVectors
        Parameters:
        labels -
        Returns:
      • wordsNearest

        public Collection<String> wordsNearest​(String word,
                                               int n)
        Get the top n words most similar to the given word
        Specified by:
        wordsNearest in interface WordVectors
        Parameters:
        word - the word to compare
        n - the n to get
        Returns:
        the top n words
      • similarity

        public double similarity​(String word,
                                 String word2)
        Returns similarity of two elements, provided by ModelUtils
        Specified by:
        similarity in interface WordVectors
        Parameters:
        word - the first word
        word2 - the second word
        Returns:
        a normalized similarity (cosine similarity)
      • setModelUtils

        public void setModelUtils​(@NonNull
                                  @NonNull ModelUtils modelUtils)
        Description copied from interface: WordVectors
        Specifies ModelUtils to be used to access model
        Specified by:
        setModelUtils in interface WordVectors
      • setLookupTable

        public void setLookupTable​(@NonNull
                                   @NonNull WeightLookupTable lookupTable)
      • setVocab

        public void setVocab​(VocabCache vocab)
      • update

        protected void update()
      • update

        protected void update​(org.nd4j.linalg.heartbeat.reports.Environment env,
                              org.nd4j.linalg.heartbeat.reports.Event event)
      • loadWeightsInto

        public void loadWeightsInto​(org.nd4j.linalg.api.ndarray.INDArray array)
        Specified by:
        loadWeightsInto in interface org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializer
      • vocabSize

        public long vocabSize()
        Specified by:
        vocabSize in interface org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializer
      • vectorSize

        public int vectorSize()
        Specified by:
        vectorSize in interface org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializer
      • jsonSerializable

        public boolean jsonSerializable()
        Specified by:
        jsonSerializable in interface org.deeplearning4j.nn.weights.embeddings.EmbeddingInitializer
      • outOfVocabularySupported

        public boolean outOfVocabularySupported()
        Description copied from interface: WordVectors
        Does implementation vectorize words absent in vocabulary
        Specified by:
        outOfVocabularySupported in interface WordVectors
        Returns:
        boolean