public class LuceneInvertedIndex extends Object implements InvertedIndex, org.apache.lucene.index.IndexReader.ReaderClosedListener, Iterator<List<VocabWord>>
Modifier and Type | Class and Description |
---|---|
class |
LuceneInvertedIndex.BatchDocIter |
static class |
LuceneInvertedIndex.Builder |
class |
LuceneInvertedIndex.DocIter |
Modifier and Type | Field and Description |
---|---|
static String |
DEFAULT_INDEX_DIR |
static String |
INDEX_PATH |
static String |
LABEL |
static String |
WORD_FIELD |
Constructor and Description |
---|
LuceneInvertedIndex(VocabCache vocabCache) |
LuceneInvertedIndex(VocabCache vocabCache,
boolean cache) |
LuceneInvertedIndex(VocabCache vocabCache,
boolean cache,
String indexPath) |
Modifier and Type | Method and Description |
---|---|
void |
addLabelForDoc(int doc,
String label)
Adds words to the given document
|
void |
addLabelForDoc(int doc,
VocabWord word)
Add word to a document
|
void |
addLabelsForDoc(int doc,
Collection<String> label)
Adds words to the given document
|
void |
addLabelsForDoc(int doc,
List<VocabWord> label)
Add word to a document
|
void |
addWordsToDoc(int doc,
List<VocabWord> words)
Adds words to the given document
|
void |
addWordsToDoc(int doc,
List<VocabWord> words,
Collection<String> label)
Adds words to the given document
|
void |
addWordsToDoc(int doc,
List<VocabWord> words,
String label)
Adds words to the given document
|
void |
addWordsToDoc(int doc,
List<VocabWord> words,
VocabWord label)
Adds words to the given document
|
void |
addWordsToDocVocabWord(int doc,
List<VocabWord> words,
Collection<VocabWord> label)
Adds words to the given document
|
void |
addWordToDoc(int doc,
VocabWord word)
Add word to a document
|
int[] |
allDocs()
Returns a list of all documents
|
Iterator<List<List<VocabWord>>> |
batchIter(int batchSize)
Iterate over batches
|
int |
batchSize()
For word vectors, this is the batch size for which to train on
|
void |
cleanup()
Cleanup any resources used
|
Iterator<List<VocabWord>> |
docs()
Iterate over documents
|
List<VocabWord> |
document(int index)
Returns a list of words for a document
|
int[] |
documents(VocabWord vocabWord)
Returns the list of documents a vocab word is in
|
Pair<List<VocabWord>,String> |
documentWithLabel(int index)
Returns a list of words for a document
and the associated label
|
Pair<List<VocabWord>,Collection<String>> |
documentWithLabels(int index)
Returns a list of words associated with the document
and the associated labels
|
void |
eachDoc(com.google.common.base.Function<List<VocabWord>,Void> func,
ExecutorService exec)
Iterate over each document
|
void |
eachDocWithLabel(com.google.common.base.Function<Pair<List<VocabWord>,String>,Void> func,
ExecutorService exec)
Iterate over each document with a label
|
void |
eachDocWithLabels(com.google.common.base.Function<Pair<List<VocabWord>,Collection<String>>,Void> func,
ExecutorService exec)
Iterate over each document with a label
|
void |
finish()
Finishes saving data
|
boolean |
hasNext() |
Iterator<List<VocabWord>> |
miniBatches()
Iterates over mini batches
|
List<VocabWord> |
next() |
int |
numDocuments()
Returns the number of documents
|
void |
onClose(org.apache.lucene.index.IndexReader reader) |
void |
remove() |
double |
sample()
Sampling for creating mini batches
|
int |
totalWords()
Total number of words in the index
|
void |
unlock()
Unlock the index
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
forEachRemaining
public static final String WORD_FIELD
public static final String LABEL
public static final String INDEX_PATH
public static final String DEFAULT_INDEX_DIR
public LuceneInvertedIndex(VocabCache vocabCache, boolean cache)
public LuceneInvertedIndex(VocabCache vocabCache, boolean cache, String indexPath)
public LuceneInvertedIndex(VocabCache vocabCache)
public Iterator<List<List<VocabWord>>> batchIter(int batchSize)
InvertedIndex
batchIter
in interface InvertedIndex
public Iterator<List<VocabWord>> docs()
InvertedIndex
docs
in interface InvertedIndex
public void unlock()
InvertedIndex
unlock
in interface InvertedIndex
public void cleanup()
InvertedIndex
cleanup
in interface InvertedIndex
public double sample()
InvertedIndex
sample
in interface InvertedIndex
public Iterator<List<VocabWord>> miniBatches()
InvertedIndex
miniBatches
in interface InvertedIndex
public List<VocabWord> document(int index)
InvertedIndex
document
in interface InvertedIndex
public int[] documents(VocabWord vocabWord)
InvertedIndex
documents
in interface InvertedIndex
vocabWord
- the vocab word to get documents forpublic int numDocuments()
InvertedIndex
numDocuments
in interface InvertedIndex
public int[] allDocs()
InvertedIndex
allDocs
in interface InvertedIndex
public void addWordToDoc(int doc, VocabWord word)
InvertedIndex
addWordToDoc
in interface InvertedIndex
doc
- the document to add toword
- the word to addpublic void addWordsToDoc(int doc, List<VocabWord> words)
InvertedIndex
addWordsToDoc
in interface InvertedIndex
doc
- the document to add towords
- the words to addpublic Pair<List<VocabWord>,String> documentWithLabel(int index)
InvertedIndex
documentWithLabel
in interface InvertedIndex
public Pair<List<VocabWord>,Collection<String>> documentWithLabels(int index)
InvertedIndex
documentWithLabels
in interface InvertedIndex
public void addLabelForDoc(int doc, VocabWord word)
InvertedIndex
addLabelForDoc
in interface InvertedIndex
doc
- the document to add toword
- the word to addpublic void addLabelForDoc(int doc, String label)
InvertedIndex
addLabelForDoc
in interface InvertedIndex
doc
- the document to add topublic void addWordsToDoc(int doc, List<VocabWord> words, String label)
InvertedIndex
addWordsToDoc
in interface InvertedIndex
doc
- the document to add towords
- the words to addlabel
- the label for the documentpublic void addWordsToDoc(int doc, List<VocabWord> words, VocabWord label)
InvertedIndex
addWordsToDoc
in interface InvertedIndex
doc
- the document to add towords
- the words to addlabel
- the label for the documentpublic void addLabelsForDoc(int doc, List<VocabWord> label)
InvertedIndex
addLabelsForDoc
in interface InvertedIndex
doc
- the document to add tolabel
- the word to addpublic void addLabelsForDoc(int doc, Collection<String> label)
InvertedIndex
addLabelsForDoc
in interface InvertedIndex
doc
- the document to add tolabel
- the labels to addpublic void addWordsToDoc(int doc, List<VocabWord> words, Collection<String> label)
InvertedIndex
addWordsToDoc
in interface InvertedIndex
doc
- the document to add towords
- the words to addlabel
- the label for the documentpublic void addWordsToDocVocabWord(int doc, List<VocabWord> words, Collection<VocabWord> label)
InvertedIndex
addWordsToDocVocabWord
in interface InvertedIndex
doc
- the document to add towords
- the words to addlabel
- the label for the documentpublic void finish()
InvertedIndex
finish
in interface InvertedIndex
public int totalWords()
InvertedIndex
totalWords
in interface InvertedIndex
public int batchSize()
InvertedIndex
batchSize
in interface InvertedIndex
public void eachDocWithLabels(com.google.common.base.Function<Pair<List<VocabWord>,Collection<String>>,Void> func, ExecutorService exec)
InvertedIndex
eachDocWithLabels
in interface InvertedIndex
func
- the function to applyexec
- executor service for executionpublic void eachDocWithLabel(com.google.common.base.Function<Pair<List<VocabWord>,String>,Void> func, ExecutorService exec)
InvertedIndex
eachDocWithLabel
in interface InvertedIndex
func
- the function to applyexec
- executor service for executionpublic void eachDoc(com.google.common.base.Function<List<VocabWord>,Void> func, ExecutorService exec)
InvertedIndex
eachDoc
in interface InvertedIndex
func
- the function to applyexec
- executor service for executionpublic void onClose(org.apache.lucene.index.IndexReader reader)
onClose
in interface org.apache.lucene.index.IndexReader.ReaderClosedListener
Copyright © 2015. All rights reserved.