public class LuceneInvertedIndex<T extends SequenceElement> extends Object implements InvertedIndex<T>, org.apache.lucene.index.IndexReader.ReaderClosedListener, Iterator<List<T>>
Modifier and Type | Class and Description |
---|---|
class |
LuceneInvertedIndex.BatchDocIter |
static class |
LuceneInvertedIndex.Builder<T extends SequenceElement> |
class |
LuceneInvertedIndex.DocIter |
Modifier and Type | Field and Description |
---|---|
static String |
DEFAULT_INDEX_DIR |
static String |
INDEX_PATH |
static String |
LABEL |
static String |
WORD_FIELD |
Constructor and Description |
---|
LuceneInvertedIndex(VocabCache vocabCache) |
LuceneInvertedIndex(VocabCache vocabCache,
boolean cache) |
LuceneInvertedIndex(VocabCache vocabCache,
boolean cache,
String indexPath) |
Modifier and Type | Method and Description |
---|---|
void |
addLabelForDoc(int doc,
String label)
Adds words to the given document
|
void |
addLabelForDoc(int doc,
T word)
Add word to a document
|
void |
addLabelsForDoc(int doc,
Collection<String> label)
Adds words to the given document
|
void |
addLabelsForDoc(int doc,
List<T> label)
Add word to a document
|
void |
addWordsToDoc(int doc,
List<T> words)
Adds words to the given document
|
void |
addWordsToDoc(int doc,
List<T> words,
Collection<String> label)
Adds words to the given document
|
void |
addWordsToDoc(int doc,
List<T> words,
String label)
Adds words to the given document
|
void |
addWordsToDoc(int doc,
List<T> words,
T label)
Adds words to the given document
|
void |
addWordsToDocVocabWord(int doc,
List<T> words,
Collection<T> label)
Adds words to the given document
|
void |
addWordToDoc(int doc,
T word)
Add word to a document
|
int[] |
allDocs()
Returns a list of all documents
|
Iterator<List<List<T>>> |
batchIter(int batchSize)
Iterate over batches
|
int |
batchSize()
For word vectors, this is the batch size for which to train on
|
void |
cleanup()
Cleanup any resources used
|
Iterator<List<T>> |
docs()
Iterate over documents
|
List<T> |
document(int index)
Returns a list of words for a document
|
int[] |
documents(T vocabWord)
Returns the list of documents a vocab word is in
|
Pair<List<T>,String> |
documentWithLabel(int index)
Returns a list of words for a document
and the associated label
|
Pair<List<T>,Collection<String>> |
documentWithLabels(int index)
Returns a list of words associated with the document
and the associated labels
|
void |
eachDoc(com.google.common.base.Function<List<T>,Void> func,
ExecutorService exec)
Iterate over each document
|
void |
eachDocWithLabel(com.google.common.base.Function<Pair<List<T>,String>,Void> func,
ExecutorService exec)
Iterate over each document with a label
|
void |
eachDocWithLabels(com.google.common.base.Function<Pair<List<T>,Collection<String>>,Void> func,
ExecutorService exec)
Iterate over each document with a label
|
void |
finish()
Finishes saving data
|
boolean |
hasNext() |
Iterator<List<T>> |
miniBatches()
Iterates over mini batches
|
List<T> |
next() |
int |
numDocuments()
Returns the number of documents
|
void |
onClose(org.apache.lucene.index.IndexReader reader) |
void |
remove() |
double |
sample()
Sampling for creating mini batches
|
long |
totalWords()
Total number of words in the index
|
void |
unlock()
Unlock the index
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
forEachRemaining
public static final String WORD_FIELD
public static final String LABEL
public static final String INDEX_PATH
public static final String DEFAULT_INDEX_DIR
public LuceneInvertedIndex(VocabCache vocabCache, boolean cache)
public LuceneInvertedIndex(VocabCache vocabCache, boolean cache, String indexPath)
public LuceneInvertedIndex(VocabCache vocabCache)
public Iterator<List<List<T>>> batchIter(int batchSize)
InvertedIndex
batchIter
in interface InvertedIndex<T extends SequenceElement>
public Iterator<List<T>> docs()
InvertedIndex
docs
in interface InvertedIndex<T extends SequenceElement>
public void unlock()
InvertedIndex
unlock
in interface InvertedIndex<T extends SequenceElement>
public void cleanup()
InvertedIndex
cleanup
in interface InvertedIndex<T extends SequenceElement>
public double sample()
InvertedIndex
sample
in interface InvertedIndex<T extends SequenceElement>
public Iterator<List<T>> miniBatches()
InvertedIndex
miniBatches
in interface InvertedIndex<T extends SequenceElement>
public List<T> document(int index)
InvertedIndex
document
in interface InvertedIndex<T extends SequenceElement>
public int[] documents(T vocabWord)
InvertedIndex
documents
in interface InvertedIndex<T extends SequenceElement>
vocabWord
- the vocab word to get documents forpublic int numDocuments()
InvertedIndex
numDocuments
in interface InvertedIndex<T extends SequenceElement>
public int[] allDocs()
InvertedIndex
allDocs
in interface InvertedIndex<T extends SequenceElement>
public void addWordToDoc(int doc, T word)
InvertedIndex
addWordToDoc
in interface InvertedIndex<T extends SequenceElement>
doc
- the document to add toword
- the word to addpublic void addWordsToDoc(int doc, List<T> words)
InvertedIndex
addWordsToDoc
in interface InvertedIndex<T extends SequenceElement>
doc
- the document to add towords
- the words to addpublic Pair<List<T>,String> documentWithLabel(int index)
InvertedIndex
documentWithLabel
in interface InvertedIndex<T extends SequenceElement>
public Pair<List<T>,Collection<String>> documentWithLabels(int index)
InvertedIndex
documentWithLabels
in interface InvertedIndex<T extends SequenceElement>
public void addLabelForDoc(int doc, T word)
InvertedIndex
addLabelForDoc
in interface InvertedIndex<T extends SequenceElement>
doc
- the document to add toword
- the word to addpublic void addLabelForDoc(int doc, String label)
InvertedIndex
addLabelForDoc
in interface InvertedIndex<T extends SequenceElement>
doc
- the document to add topublic void addWordsToDoc(int doc, List<T> words, String label)
InvertedIndex
addWordsToDoc
in interface InvertedIndex<T extends SequenceElement>
doc
- the document to add towords
- the words to addlabel
- the label for the documentpublic void addWordsToDoc(int doc, List<T> words, T label)
InvertedIndex
addWordsToDoc
in interface InvertedIndex<T extends SequenceElement>
doc
- the document to add towords
- the words to addlabel
- the label for the documentpublic void addLabelsForDoc(int doc, List<T> label)
InvertedIndex
addLabelsForDoc
in interface InvertedIndex<T extends SequenceElement>
doc
- the document to add tolabel
- the word to addpublic void addLabelsForDoc(int doc, Collection<String> label)
InvertedIndex
addLabelsForDoc
in interface InvertedIndex<T extends SequenceElement>
doc
- the document to add tolabel
- the labels to addpublic void addWordsToDoc(int doc, List<T> words, Collection<String> label)
InvertedIndex
addWordsToDoc
in interface InvertedIndex<T extends SequenceElement>
doc
- the document to add towords
- the words to addlabel
- the label for the documentpublic void addWordsToDocVocabWord(int doc, List<T> words, Collection<T> label)
InvertedIndex
addWordsToDocVocabWord
in interface InvertedIndex<T extends SequenceElement>
doc
- the document to add towords
- the words to addlabel
- the label for the documentpublic void finish()
InvertedIndex
finish
in interface InvertedIndex<T extends SequenceElement>
public long totalWords()
InvertedIndex
totalWords
in interface InvertedIndex<T extends SequenceElement>
public int batchSize()
InvertedIndex
batchSize
in interface InvertedIndex<T extends SequenceElement>
public void eachDocWithLabels(com.google.common.base.Function<Pair<List<T>,Collection<String>>,Void> func, ExecutorService exec)
InvertedIndex
eachDocWithLabels
in interface InvertedIndex<T extends SequenceElement>
func
- the function to applyexec
- executor service for executionpublic void eachDocWithLabel(com.google.common.base.Function<Pair<List<T>,String>,Void> func, ExecutorService exec)
InvertedIndex
eachDocWithLabel
in interface InvertedIndex<T extends SequenceElement>
func
- the function to applyexec
- executor service for executionpublic void eachDoc(com.google.common.base.Function<List<T>,Void> func, ExecutorService exec)
InvertedIndex
eachDoc
in interface InvertedIndex<T extends SequenceElement>
func
- the function to applyexec
- executor service for executionpublic void onClose(org.apache.lucene.index.IndexReader reader)
onClose
in interface org.apache.lucene.index.IndexReader.ReaderClosedListener
public boolean hasNext()
hasNext
in interface Iterator<List<T extends SequenceElement>>
public List<T> next()
next
in interface Iterator<List<T extends SequenceElement>>
public void remove()
remove
in interface Iterator<List<T extends SequenceElement>>
Copyright © 2016. All Rights Reserved.