public class JointStorage<T extends SequenceElement> extends Object implements WeightLookupTable<T>, VocabCache<T>
Modifier and Type | Class and Description |
---|---|
static class |
JointStorage.Builder<T extends SequenceElement> |
Constructor and Description |
---|
JointStorage() |
Modifier and Type | Method and Description |
---|---|
void |
addToken(SequenceElement word)
Adds a token
to the cache
|
void |
addWordToIndex(int index,
String word) |
boolean |
containsWord(String word)
Returns true if the cache contains the given word
|
int |
docAppearedIn(String word)
Count of documents a word appeared in
|
T |
elementAtIndex(int index)
Returns SequenceElement at the given index or null
|
int |
getVectorLength()
Returns outcome vector length
|
org.nd4j.linalg.api.ndarray.INDArray |
getWeights() |
boolean |
hasToken(String token)
Returns whether the cache
contains this token or not
|
void |
importVocabulary(VocabCache<T> vocabCache)
imports vocabulary
|
void |
incrementDocCount(String word,
int howMuch)
Increment the document count
|
void |
incrementTotalDocCount()
Increment the doc count
|
void |
incrementTotalDocCount(int by)
Increment the doc count
|
void |
incrementWordCount(String word)
Increment the count for the given word
|
void |
incrementWordCount(String word,
int increment)
Increment the count for the given word by
the amount increment
|
int |
indexOf(String word)
Returns the index of a given word
|
void |
iterate(T w1,
T w2)
Iterate on the given 2 vocab words
|
void |
iterateSample(T w1,
T w2,
AtomicLong nextRandom,
double alpha)
Iterate on the given 2 vocab words
|
int |
layerSize()
The layer size for the lookup table
|
org.nd4j.linalg.api.ndarray.INDArray |
loadCodes(int[] codes)
Loads the co-occurrences for the given codes
|
void |
loadVocab()
Load vocab
|
int |
numWords()
Returns number of words in all underlying vocabularies
|
void |
plotVocab()
Render the words via tsne
|
void |
plotVocab(Tsne tsne)
Render the words via TSNE
|
void |
putCode(int codeIndex,
org.nd4j.linalg.api.ndarray.INDArray code) |
void |
putVector(String word,
org.nd4j.linalg.api.ndarray.INDArray vector)
Inserts a word vector
|
void |
putVocabWord(String word)
Inserts the word as a vocab word
(it gets the vocab word from the internal token store).
|
void |
removeElement(String label)
Removes element with specified label from vocabulary
Please note: Huffman index should be updated after element removal
|
void |
removeElement(T element)
Removes specified element from vocabulary
Please note: Huffman index should be updated after element removal
|
void |
resetWeights()
Reset the weights of the cache
|
void |
resetWeights(boolean reset)
Clear out all weights regardless
|
void |
saveVocab()
Saves the vocab: this allow for reuse of word frequencies
|
void |
setCountForDoc(String word,
int count)
Set the count for the number of documents the word appears in
|
void |
setLearningRate(double lr)
Sets the learning rate
|
T |
tokenFor(String word)
Returns the token (again not necessarily in the vocab)
for this word
|
Collection<T> |
tokens()
All of the tokens in the cache, (not necessarily apart of the vocab)
|
int |
totalNumberOfDocs()
Returns the total of number of documents encountered in the corpus
|
long |
totalWordOccurrences()
The total number of word occurrences
|
void |
updateWordsOccurencies()
Updates counters
|
org.nd4j.linalg.api.ndarray.INDArray |
vector(String word) |
Iterator<org.nd4j.linalg.api.ndarray.INDArray> |
vectors()
Iterates through all of the vectors in the cache
|
boolean |
vocabExists()
Vocab exists already
|
Collection<T> |
vocabWords()
Returns all SequenceElements in this JointStorage instance.
|
String |
wordAtIndex(int index)
Returns the word contained at the given index or null
|
T |
wordFor(String word) |
int |
wordFrequency(String word)
Returns the number of times the word has occurred
|
Collection<String> |
words()
Returns all of the words in the vocab
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getTableId, setTableId
protected Long tableId
public void loadVocab()
VocabCache
loadVocab
in interface VocabCache<T extends SequenceElement>
public boolean vocabExists()
VocabCache
vocabExists
in interface VocabCache<T extends SequenceElement>
public void saveVocab()
VocabCache
saveVocab
in interface VocabCache<T extends SequenceElement>
public Collection<String> words()
VocabCache
words
in interface VocabCache<T extends SequenceElement>
public void incrementWordCount(String word)
VocabCache
incrementWordCount
in interface VocabCache<T extends SequenceElement>
word
- the word to increment the count forpublic void incrementWordCount(String word, int increment)
VocabCache
incrementWordCount
in interface VocabCache<T extends SequenceElement>
word
- the word to increment the count forincrement
- the amount to increment bypublic int wordFrequency(String word)
VocabCache
wordFrequency
in interface VocabCache<T extends SequenceElement>
word
- the word to retrieve the occurrence frequency forpublic boolean containsWord(String word)
VocabCache
containsWord
in interface VocabCache<T extends SequenceElement>
word
- the word to check forpublic String wordAtIndex(int index)
VocabCache
wordAtIndex
in interface VocabCache<T extends SequenceElement>
index
- the index of the word to getpublic T elementAtIndex(int index)
VocabCache
elementAtIndex
in interface VocabCache<T extends SequenceElement>
public int indexOf(String word)
VocabCache
indexOf
in interface VocabCache<T extends SequenceElement>
word
- the index of a given wordpublic Collection<T> vocabWords()
vocabWords
in interface VocabCache<T extends SequenceElement>
public long totalWordOccurrences()
VocabCache
totalWordOccurrences
in interface VocabCache<T extends SequenceElement>
public T wordFor(String word)
wordFor
in interface VocabCache<T extends SequenceElement>
public void addWordToIndex(int index, String word)
addWordToIndex
in interface VocabCache<T extends SequenceElement>
public void putVocabWord(String word)
VocabCache
putVocabWord
in interface VocabCache<T extends SequenceElement>
word
- the word to add to the vocabpublic int numWords()
numWords
in interface VocabCache<T extends SequenceElement>
public int docAppearedIn(String word)
VocabCache
docAppearedIn
in interface VocabCache<T extends SequenceElement>
word
- the number of documents the word appeared inpublic void incrementDocCount(String word, int howMuch)
VocabCache
incrementDocCount
in interface VocabCache<T extends SequenceElement>
word
- the word to increment bypublic void setCountForDoc(String word, int count)
VocabCache
setCountForDoc
in interface VocabCache<T extends SequenceElement>
word
- the word to set the count forcount
- the count of the wordpublic int totalNumberOfDocs()
VocabCache
totalNumberOfDocs
in interface VocabCache<T extends SequenceElement>
public void incrementTotalDocCount()
VocabCache
incrementTotalDocCount
in interface VocabCache<T extends SequenceElement>
public void incrementTotalDocCount(int by)
VocabCache
incrementTotalDocCount
in interface VocabCache<T extends SequenceElement>
by
- the number to increment bypublic Collection<T> tokens()
VocabCache
tokens
in interface VocabCache<T extends SequenceElement>
public void addToken(SequenceElement word)
VocabCache
addToken
in interface VocabCache<T extends SequenceElement>
word
- the word to addpublic T tokenFor(String word)
VocabCache
tokenFor
in interface VocabCache<T extends SequenceElement>
word
- the word to get the token forpublic boolean hasToken(String token)
VocabCache
hasToken
in interface VocabCache<T extends SequenceElement>
token
- the token to tespublic void importVocabulary(VocabCache<T> vocabCache)
VocabCache
importVocabulary
in interface VocabCache<T extends SequenceElement>
public void updateWordsOccurencies()
VocabCache
updateWordsOccurencies
in interface VocabCache<T extends SequenceElement>
public void removeElement(String label)
VocabCache
removeElement
in interface VocabCache<T extends SequenceElement>
label
- label of the element to be removedpublic void removeElement(T element)
VocabCache
removeElement
in interface VocabCache<T extends SequenceElement>
element
- SequenceElement to be removedpublic int layerSize()
WeightLookupTable
layerSize
in interface WeightLookupTable<T extends SequenceElement>
public void resetWeights(boolean reset)
WeightLookupTable
resetWeights
in interface WeightLookupTable<T extends SequenceElement>
public void plotVocab(Tsne tsne)
WeightLookupTable
plotVocab
in interface WeightLookupTable<T extends SequenceElement>
tsne
- the tsne to usepublic void plotVocab()
WeightLookupTable
plotVocab
in interface WeightLookupTable<T extends SequenceElement>
public void putCode(int codeIndex, org.nd4j.linalg.api.ndarray.INDArray code)
putCode
in interface WeightLookupTable<T extends SequenceElement>
public org.nd4j.linalg.api.ndarray.INDArray loadCodes(int[] codes)
WeightLookupTable
loadCodes
in interface WeightLookupTable<T extends SequenceElement>
codes
- the codes to loadpublic void iterate(T w1, T w2)
WeightLookupTable
iterate
in interface WeightLookupTable<T extends SequenceElement>
w1
- the first word to iterate onw2
- the second word to iterate onpublic void iterateSample(T w1, T w2, AtomicLong nextRandom, double alpha)
WeightLookupTable
iterateSample
in interface WeightLookupTable<T extends SequenceElement>
w1
- the first word to iterate onw2
- the second word to iterate onnextRandom
- nextRandom for samplingalpha
- the alpha to use for learningpublic void putVector(String word, org.nd4j.linalg.api.ndarray.INDArray vector)
WeightLookupTable
putVector
in interface WeightLookupTable<T extends SequenceElement>
word
- the word to insertvector
- the vector to insertpublic org.nd4j.linalg.api.ndarray.INDArray vector(String word)
vector
in interface WeightLookupTable<T extends SequenceElement>
public void resetWeights()
WeightLookupTable
resetWeights
in interface WeightLookupTable<T extends SequenceElement>
public void setLearningRate(double lr)
WeightLookupTable
setLearningRate
in interface WeightLookupTable<T extends SequenceElement>
public Iterator<org.nd4j.linalg.api.ndarray.INDArray> vectors()
WeightLookupTable
vectors
in interface WeightLookupTable<T extends SequenceElement>
public org.nd4j.linalg.api.ndarray.INDArray getWeights()
getWeights
in interface WeightLookupTable<T extends SequenceElement>
public int getVectorLength()
WeightLookupTable
getVectorLength
in interface WeightLookupTable<T extends SequenceElement>
Copyright © 2016. All Rights Reserved.