public class LuceneInvertedIndex extends Object implements InvertedIndex, org.apache.lucene.index.IndexReader.ReaderClosedListener, Iterator<List<VocabWord>>
Modifier and Type | Class and Description |
---|---|
static class |
LuceneInvertedIndex.Builder |
Modifier and Type | Field and Description |
---|---|
static String |
INDEX_PATH |
static String |
WORD_FIELD |
Constructor and Description |
---|
LuceneInvertedIndex(VocabCache vocabCache,
boolean cache) |
LuceneInvertedIndex(VocabCache vocabCache,
boolean cache,
String indexPath) |
Modifier and Type | Method and Description |
---|---|
void |
addWordsToDoc(int doc,
List<VocabWord> words)
Adds words to the given document
|
void |
addWordToDoc(int doc,
VocabWord word)
Add word to a document
|
Collection<Integer> |
allDocs()
Returns a list of all documents
|
int |
batchSize()
For word vectors, this is the batch size for which to train on
|
List<VocabWord> |
document(int index)
Returns a list of words for a document
|
List<Integer> |
documents(VocabWord vocabWord)
Returns the list of documents a vocab word is in
|
void |
finish()
Finishes saving data
|
boolean |
hasNext() |
Iterator<List<VocabWord>> |
miniBatches()
Iterates over mini batches
|
List<VocabWord> |
next() |
int |
numDocuments()
Returns the number of documents
|
void |
onClose(org.apache.lucene.index.IndexReader reader) |
void |
remove() |
double |
sample()
Sampling for creating mini batches
|
int |
totalWords()
Total number of words in the index
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
forEachRemaining
public static final String WORD_FIELD
public static final String INDEX_PATH
public LuceneInvertedIndex(VocabCache vocabCache, boolean cache)
public LuceneInvertedIndex(VocabCache vocabCache, boolean cache, String indexPath)
public double sample()
InvertedIndex
sample
in interface InvertedIndex
public Iterator<List<VocabWord>> miniBatches()
InvertedIndex
miniBatches
in interface InvertedIndex
public List<VocabWord> document(int index)
InvertedIndex
document
in interface InvertedIndex
public List<Integer> documents(VocabWord vocabWord)
InvertedIndex
documents
in interface InvertedIndex
vocabWord
- the vocab word to get documents forpublic int numDocuments()
InvertedIndex
numDocuments
in interface InvertedIndex
public Collection<Integer> allDocs()
InvertedIndex
allDocs
in interface InvertedIndex
public void addWordToDoc(int doc, VocabWord word)
InvertedIndex
addWordToDoc
in interface InvertedIndex
doc
- the document to add toword
- the word to addpublic void addWordsToDoc(int doc, List<VocabWord> words)
InvertedIndex
addWordsToDoc
in interface InvertedIndex
doc
- the document to add towords
- the words to addpublic void finish()
InvertedIndex
finish
in interface InvertedIndex
public int totalWords()
InvertedIndex
totalWords
in interface InvertedIndex
public int batchSize()
InvertedIndex
batchSize
in interface InvertedIndex
public void onClose(org.apache.lucene.index.IndexReader reader)
onClose
in interface org.apache.lucene.index.IndexReader.ReaderClosedListener
Copyright © 2014. All rights reserved.