public class LuceneInvertedIndex extends Object implements InvertedIndex, org.apache.lucene.index.IndexReader.ReaderClosedListener, Iterator<List<VocabWord>>
Modifier and Type | Class and Description |
---|---|
static class |
LuceneInvertedIndex.Builder |
Modifier and Type | Field and Description |
---|---|
static String |
DEFAULT_INDEX_DIR |
static String |
INDEX_PATH |
static String |
WORD_FIELD |
Constructor and Description |
---|
LuceneInvertedIndex(VocabCache vocabCache,
boolean cache) |
LuceneInvertedIndex(VocabCache vocabCache,
boolean cache,
String indexPath) |
Modifier and Type | Method and Description |
---|---|
void |
addWordsToDoc(int doc,
List<VocabWord> words)
Adds words to the given document
|
void |
addWordToDoc(int doc,
VocabWord word)
Add word to a document
|
int[] |
allDocs()
Returns a list of all documents
|
int |
batchSize()
For word vectors, this is the batch size for which to train on
|
List<VocabWord> |
document(int index)
Returns a list of words for a document
|
int[] |
documents(VocabWord vocabWord)
Returns the list of documents a vocab word is in
|
void |
eachDoc(com.google.common.base.Function<List<VocabWord>,Void> func,
ExecutorService exec)
Iterate over each document
|
void |
finish()
Finishes saving data
|
boolean |
hasNext() |
Iterator<List<VocabWord>> |
miniBatches()
Iterates over mini batches
|
List<VocabWord> |
next() |
int |
numDocuments()
Returns the number of documents
|
void |
onClose(org.apache.lucene.index.IndexReader reader) |
void |
remove() |
double |
sample()
Sampling for creating mini batches
|
int |
totalWords()
Total number of words in the index
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
forEachRemaining
public static final String WORD_FIELD
public static final String INDEX_PATH
public static final String DEFAULT_INDEX_DIR
public LuceneInvertedIndex(VocabCache vocabCache, boolean cache)
public LuceneInvertedIndex(VocabCache vocabCache, boolean cache, String indexPath)
public double sample()
InvertedIndex
sample
in interface InvertedIndex
public Iterator<List<VocabWord>> miniBatches()
InvertedIndex
miniBatches
in interface InvertedIndex
public List<VocabWord> document(int index)
InvertedIndex
document
in interface InvertedIndex
public int[] documents(VocabWord vocabWord)
InvertedIndex
documents
in interface InvertedIndex
vocabWord
- the vocab word to get documents forpublic int numDocuments()
InvertedIndex
numDocuments
in interface InvertedIndex
public int[] allDocs()
InvertedIndex
allDocs
in interface InvertedIndex
public void addWordToDoc(int doc, VocabWord word)
InvertedIndex
addWordToDoc
in interface InvertedIndex
doc
- the document to add toword
- the word to addpublic void addWordsToDoc(int doc, List<VocabWord> words)
InvertedIndex
addWordsToDoc
in interface InvertedIndex
doc
- the document to add towords
- the words to addpublic void finish()
InvertedIndex
finish
in interface InvertedIndex
public int totalWords()
InvertedIndex
totalWords
in interface InvertedIndex
public int batchSize()
InvertedIndex
batchSize
in interface InvertedIndex
public void eachDoc(com.google.common.base.Function<List<VocabWord>,Void> func, ExecutorService exec)
InvertedIndex
eachDoc
in interface InvertedIndex
func
- the function to applyexec
- exectuor service for executionpublic void onClose(org.apache.lucene.index.IndexReader reader)
onClose
in interface org.apache.lucene.index.IndexReader.ReaderClosedListener
Copyright © 2014. All rights reserved.