public class WordVectorSerializer extends Object
Modifier and Type | Class and Description |
---|---|
protected static class |
WordVectorSerializer.BinaryReader |
protected static class |
WordVectorSerializer.CSVReader |
protected static interface |
WordVectorSerializer.Reader |
static class |
WordVectorSerializer.ReadHelper
Helper static methods to read data from input stream.
|
Modifier and Type | Method and Description |
---|---|
static Word2Vec |
fromPair(org.nd4j.common.primitives.Pair<InMemoryLookupTable,VocabCache> pair)
Load word vectors from the given pair
|
static WordVectors |
fromTableAndVocab(WeightLookupTable table,
VocabCache vocab)
Load word vectors for the given vocab and table
|
protected static TokenizerFactory |
getTokenizerFactory(VectorsConfiguration configuration) |
static Word2Vec |
loadFullModel(@NonNull String path)
Deprecated.
Use readWord2VecModel() or loadStaticModel() method instead
|
static WordVectors |
loadStaticModel(@NonNull File file)
This method restores previously saved w2v model.
|
static WordVectors |
loadStaticModel(InputStream inputStream)
This method restores previously saved w2v model.
|
static org.nd4j.common.primitives.Pair<InMemoryLookupTable,VocabCache> |
loadTxt(@NonNull File file) |
static org.nd4j.common.primitives.Pair<InMemoryLookupTable,VocabCache> |
loadTxt(@NonNull InputStream inputStream)
Loads an in memory cache from the given input stream (sets syn0 and the vocab).
|
static WordVectors |
loadTxtVectors(File vectorsFile)
Deprecated.
|
static WordVectors |
loadTxtVectors(@NonNull InputStream stream,
boolean skipFirstLine)
Deprecated.
Use readWord2VecModel() or loadStaticModel() method instead
|
static void |
printOutProjectedMemoryUse(long numWords,
int vectorLength,
int numTables)
This method prints memory usage to log
|
static Word2Vec |
readAsBinary(@NonNull File file) |
static Word2Vec |
readAsBinary(@NonNull InputStream inputStream)
This method loads Word2Vec model from binary input stream.
|
static Word2Vec |
readAsBinaryNoLineBreaks(@NonNull File file) |
static Word2Vec |
readAsBinaryNoLineBreaks(@NonNull InputStream inputStream) |
static Word2Vec |
readAsCsv(@NonNull File file) |
static Word2Vec |
readAsCsv(@NonNull InputStream inputStream)
This method loads Word2Vec model from csv file
|
static Word2Vec |
readBinaryModel(InputStream inputStream,
boolean linebreaks,
boolean normalize)
Read a binary word2vec from input stream.
|
static <T extends SequenceElement> |
readLookupTable(File file) |
static <T extends SequenceElement> |
readLookupTable(InputStream stream) |
static ParagraphVectors |
readParagraphVectors(File file)
This method restores ParagraphVectors model previously saved with writeParagraphVectors()
|
static ParagraphVectors |
readParagraphVectors(InputStream stream)
This method restores ParagraphVectors model previously saved with writeParagraphVectors()
|
static ParagraphVectors |
readParagraphVectors(String path)
This method restores ParagraphVectors model previously saved with writeParagraphVectors()
|
static ParagraphVectors |
readParagraphVectorsFromText(@NonNull File file)
Deprecated.
|
static ParagraphVectors |
readParagraphVectorsFromText(@NonNull InputStream stream)
Deprecated.
|
static ParagraphVectors |
readParagraphVectorsFromText(@NonNull String path)
Deprecated.
|
static <T extends SequenceElement> |
readSequenceVectors(@NonNull File file,
boolean readExtendedTables)
This method loads SequenceVectors from specified file path
|
static <T extends SequenceElement> |
readSequenceVectors(@NonNull InputStream stream,
boolean readExtendedTables)
This method loads SequenceVectors from specified input stream
|
static <T extends SequenceElement> |
readSequenceVectors(@NonNull SequenceElementFactory<T> factory,
@NonNull File file)
This method loads previously saved SequenceVectors model from File
|
static <T extends SequenceElement> |
readSequenceVectors(@NonNull SequenceElementFactory<T> factory,
@NonNull InputStream stream)
This method loads previously saved SequenceVectors model from InputStream
|
static <T extends SequenceElement> |
readSequenceVectors(@NonNull String path,
boolean readExtendedTables)
This method loads SequenceVectors from specified file path
|
static VocabCache<VocabWord> |
readVocabCache(@NonNull File file)
This method reads vocab cache from provided file.
|
static VocabCache<VocabWord> |
readVocabCache(@NonNull InputStream stream)
This method reads vocab cache from provided InputStream.
|
static Word2Vec |
readWord2Vec(File file)
Deprecated.
|
static Word2Vec |
readWord2Vec(@NonNull File file,
boolean readExtendedTables)
This method loads Word2Vec model from file
|
static Word2Vec |
readWord2Vec(@NonNull InputStream stream,
boolean readExtendedTable)
This method loads Word2Vec model from input stream
|
static Word2Vec |
readWord2Vec(@NonNull String path,
boolean readExtendedTables)
This method restores Word2Vec model from file
|
static Word2Vec |
readWord2VecFromText(@NonNull File vectors,
@NonNull File hs,
@NonNull File h_codes,
@NonNull File h_points,
@NonNull VectorsConfiguration configuration)
This method allows you to read ParagraphVectors from externally originated vectors and syn1.
|
static Word2Vec |
readWord2VecModel(File file)
This method
1) Binary model, either compressed or not.
|
static Word2Vec |
readWord2VecModel(File file,
boolean extendedModel)
This method
1) Binary model, either compressed or not.
|
static Word2Vec |
readWord2VecModel(String path)
This method
1) Binary model, either compressed or not.
|
static Word2Vec |
readWord2VecModel(String path,
boolean extendedModel)
This method
1) Binary model, either compressed or not.
|
static FastText |
readWordVectors(File path)
This method unloads FastText model from file
|
static void |
writeFullModel(@NonNull Word2Vec vec,
@NonNull String path)
Deprecated.
Use writeWord2VecModel() method instead
|
static <T extends SequenceElement> |
writeLookupTable(WeightLookupTable<T> weightLookupTable,
@NonNull File file)
This method saves table of weights to file
|
static void |
writeParagraphVectors(ParagraphVectors vectors,
File file)
This method saves ParagraphVectors model into compressed zip file
|
static void |
writeParagraphVectors(ParagraphVectors vectors,
OutputStream stream)
This method saves ParagraphVectors model into compressed zip file and sends it to output stream
|
static void |
writeParagraphVectors(ParagraphVectors vectors,
String path)
This method saves ParagraphVectors model into compressed zip file located at path
|
static <T extends SequenceElement> |
writeSequenceVectors(@NonNull SequenceVectors<T> vectors,
@NonNull OutputStream stream)
This method saves specified SequenceVectors model to target OutputStream
|
static <T extends SequenceElement> |
writeSequenceVectors(@NonNull SequenceVectors<T> vectors,
@NonNull SequenceElementFactory<T> factory,
@NonNull File file)
This method saves specified SequenceVectors model to target file
|
static <T extends SequenceElement> |
writeSequenceVectors(@NonNull SequenceVectors<T> vectors,
@NonNull SequenceElementFactory<T> factory,
@NonNull OutputStream stream)
This method saves specified SequenceVectors model to target OutputStream
|
static <T extends SequenceElement> |
writeSequenceVectors(@NonNull SequenceVectors<T> vectors,
@NonNull SequenceElementFactory<T> factory,
@NonNull String path)
This method saves specified SequenceVectors model to target file path
|
static void |
writeTsneFormat(Word2Vec vec,
org.nd4j.linalg.api.ndarray.INDArray tsne,
File csv)
Write the tsne format
|
static void |
writeVocabCache(@NonNull VocabCache<VocabWord> vocabCache,
@NonNull File file)
This method saves vocab cache to provided File.
|
static void |
writeVocabCache(@NonNull VocabCache<VocabWord> vocabCache,
@NonNull OutputStream stream)
This method saves vocab cache to provided OutputStream.
|
static void |
writeWord2Vec(@NonNull Word2Vec word2Vec,
@NonNull OutputStream stream)
This method saves Word2Vec model to output stream
|
static void |
writeWord2VecModel(Word2Vec vectors,
File file)
This method saves Word2Vec model into compressed zip file
PLEASE NOTE: This method saves FULL model, including syn0 AND syn1
|
static void |
writeWord2VecModel(Word2Vec vectors,
OutputStream stream)
This method saves Word2Vec model into compressed zip file and sends it to output stream
PLEASE NOTE: This method saves FULL model, including syn0 AND syn1
|
static void |
writeWord2VecModel(Word2Vec vectors,
String path)
This method saves Word2Vec model into compressed zip file
PLEASE NOTE: This method saves FULL model, including syn0 AND syn1
|
static void |
writeWordVectors(@NonNull FastText vectors,
@NonNull File path)
This method loads FastText model to file
|
static void |
writeWordVectors(InMemoryLookupTable lookupTable,
InMemoryLookupCache cache,
String path)
Deprecated.
Use
writeWord2VecModel(Word2Vec, File) instead |
static void |
writeWordVectors(@NonNull ParagraphVectors vectors,
@NonNull File path)
Deprecated.
|
static void |
writeWordVectors(ParagraphVectors vectors,
OutputStream stream)
Deprecated.
|
static void |
writeWordVectors(@NonNull ParagraphVectors vectors,
@NonNull String path)
Deprecated.
|
static <T extends SequenceElement> |
writeWordVectors(WeightLookupTable<T> lookupTable,
File file)
This method writes word vectors to the given file.
|
static <T extends SequenceElement> |
writeWordVectors(WeightLookupTable<T> lookupTable,
OutputStream stream)
This method writes word vectors to the given OutputStream.
|
static <T extends SequenceElement> |
writeWordVectors(WeightLookupTable<T> lookupTable,
String path)
This method writes word vectors to the given path.
|
static void |
writeWordVectors(@NonNull Word2Vec vec,
@NonNull BufferedWriter writer)
Deprecated.
|
static void |
writeWordVectors(@NonNull Word2Vec vec,
@NonNull File file)
Deprecated.
|
static void |
writeWordVectors(@NonNull Word2Vec vec,
@NonNull OutputStream outputStream)
Deprecated.
|
static void |
writeWordVectors(@NonNull Word2Vec vec,
@NonNull String path)
Deprecated.
|
public static Word2Vec readBinaryModel(InputStream inputStream, boolean linebreaks, boolean normalize) throws NumberFormatException, IOException
inputStream
- input stream to readlinebreaks
- if true, the reader expects each word/vector to be in a separate line, terminated
by a line breaknormalize
- model
NumberFormatException
IOException
FileNotFoundException
public static <T extends SequenceElement> void writeWordVectors(WeightLookupTable<T> lookupTable, String path) throws IOException
T
- lookupTable
- path
- IOException
public static <T extends SequenceElement> void writeWordVectors(WeightLookupTable<T> lookupTable, File file) throws IOException
T
- lookupTable
- file
- IOException
public static <T extends SequenceElement> void writeWordVectors(WeightLookupTable<T> lookupTable, OutputStream stream) throws IOException
T
- lookupTable
- stream
- IOException
@Deprecated public static void writeWordVectors(@NonNull @NonNull ParagraphVectors vectors, @NonNull @NonNull File path)
writeParagraphVectors(ParagraphVectors, File)
@Deprecated public static void writeWordVectors(@NonNull @NonNull ParagraphVectors vectors, @NonNull @NonNull String path)
writeParagraphVectors(ParagraphVectors, String)
public static void writeParagraphVectors(ParagraphVectors vectors, File file)
file
- public static void writeParagraphVectors(ParagraphVectors vectors, String path)
path
- public static void writeWord2VecModel(Word2Vec vectors, File file)
public static void writeWord2VecModel(Word2Vec vectors, String path)
public static void writeWord2VecModel(Word2Vec vectors, OutputStream stream) throws IOException
IOException
public static void writeParagraphVectors(ParagraphVectors vectors, OutputStream stream) throws IOException
IOException
public static ParagraphVectors readParagraphVectors(String path) throws IOException
IOException
public static ParagraphVectors readParagraphVectors(File file) throws IOException
IOException
@Deprecated public static Word2Vec readWord2Vec(File file) throws IOException
readWord2Vec(File, boolean)
PLEASE NOTE: This method loads FULL model, so don't use it if you're only going to use weights.
file
- IOException
public static ParagraphVectors readParagraphVectors(InputStream stream) throws IOException
IOException
public static Word2Vec readWord2VecFromText(@NonNull @NonNull File vectors, @NonNull @NonNull File hs, @NonNull @NonNull File h_codes, @NonNull @NonNull File h_points, @NonNull @NonNull VectorsConfiguration configuration) throws IOException
vectors
- text file with words and their weights, aka Syn0hs
- text file HS layers, aka Syn1h_codes
- text file with Huffman tree codesh_points
- text file with Huffman tree pointsIOException
@Deprecated public static ParagraphVectors readParagraphVectorsFromText(@NonNull @NonNull String path)
readParagraphVectors(String)
Deprecation note: Please, consider using readParagraphVectors() method instead
path
- Path to file that contains previously serialized model@Deprecated public static ParagraphVectors readParagraphVectorsFromText(@NonNull @NonNull File file)
readParagraphVectors(File)
Deprecation note: Please, consider using readParagraphVectors() method instead
file
- File that contains previously serialized model@Deprecated public static ParagraphVectors readParagraphVectorsFromText(@NonNull @NonNull InputStream stream)
readParagraphVectors(InputStream)
Deprecation note: Please, consider using readParagraphVectors() method instead
stream
- InputStream that contains previously serialized model@Deprecated public static void writeWordVectors(ParagraphVectors vectors, OutputStream stream)
writeParagraphVectors(ParagraphVectors, OutputStream)
@Deprecated public static void writeWordVectors(InMemoryLookupTable lookupTable, InMemoryLookupCache cache, String path) throws IOException
writeWord2VecModel(Word2Vec, File)
insteadlookupTable
- cache
- path
- the path to writeIOException
@Deprecated public static void writeFullModel(@NonNull @NonNull Word2Vec vec, @NonNull @NonNull String path)
Deprecation note: Please, consider using writeWord2VecModel() method instead
vec
- - The Word2Vec instance to be savedpath
- - the path for json to be saved@Deprecated public static Word2Vec loadFullModel(@NonNull @NonNull String path) throws FileNotFoundException
Deprecation note: Please, consider using readWord2VecModel() or loadStaticModel() method instead
path
- - path to previously stored w2v json modelFileNotFoundException
@Deprecated public static void writeWordVectors(@NonNull @NonNull Word2Vec vec, @NonNull @NonNull String path) throws IOException
writeWord2VecModel(Word2Vec, String)
vec
- the word2vec to writepath
- the path to writeIOException
@Deprecated public static void writeWordVectors(@NonNull @NonNull Word2Vec vec, @NonNull @NonNull File file) throws IOException
writeWord2VecModel(Word2Vec, File)
vec
- the word2vec to writefile
- the file to writeIOException
@Deprecated public static void writeWordVectors(@NonNull @NonNull Word2Vec vec, @NonNull @NonNull OutputStream outputStream) throws IOException
writeWord2Vec(Word2Vec, OutputStream)
vec
- the word2vec to writeoutputStream
- - OutputStream, where all data should be sent to
the path to writeIOException
@Deprecated public static void writeWordVectors(@NonNull @NonNull Word2Vec vec, @NonNull @NonNull BufferedWriter writer) throws IOException
writeWord2Vec(Word2Vec, OutputStream)
vec
- the word2vec to writewriter
- - BufferedWriter, where all data should be written to
the path to writeIOException
public static WordVectors fromTableAndVocab(WeightLookupTable table, VocabCache vocab)
table
- the weights to usevocab
- the vocab to usepublic static Word2Vec fromPair(org.nd4j.common.primitives.Pair<InMemoryLookupTable,VocabCache> pair)
pair
- the given pair@Deprecated public static WordVectors loadTxtVectors(File vectorsFile) throws IOException
loadTxt(InputStream)
Deprecation note: Please, consider using readWord2VecModel() or loadStaticModel() method instead
vectorsFile
- the path of the file to load\FileNotFoundException
- if the file does not existIOException
public static org.nd4j.common.primitives.Pair<InMemoryLookupTable,VocabCache> loadTxt(@NonNull @NonNull File file)
public static org.nd4j.common.primitives.Pair<InMemoryLookupTable,VocabCache> loadTxt(@NonNull @NonNull InputStream inputStream)
inputStream
- input streamPair
holding the lookup table and the vocab cache.@Deprecated public static WordVectors loadTxtVectors(@NonNull @NonNull InputStream stream, boolean skipFirstLine) throws IOException
Deprecation note: Please, consider using readWord2VecModel() or loadStaticModel() method instead
stream
- InputStream that contains previously serialized modelskipFirstLine
- Set this TRUE if first line contains csv header, FALSE otherwiseIOException
public static void writeTsneFormat(Word2Vec vec, org.nd4j.linalg.api.ndarray.INDArray tsne, File csv) throws Exception
vec
- the word vectors to use for labelingtsne
- the tsne array to writecsv
- the file to useException
public static <T extends SequenceElement> void writeSequenceVectors(@NonNull @NonNull SequenceVectors<T> vectors, @NonNull @NonNull SequenceElementFactory<T> factory, @NonNull @NonNull String path) throws IOException
T
- vectors
- SequenceVectors modelfactory
- SequenceElementFactory implementation for your objectspath
- Target output file pathIOException
public static <T extends SequenceElement> void writeSequenceVectors(@NonNull @NonNull SequenceVectors<T> vectors, @NonNull @NonNull SequenceElementFactory<T> factory, @NonNull @NonNull File file) throws IOException
T
- vectors
- SequenceVectors modelfactory
- SequenceElementFactory implementation for your objectsfile
- Target output fileIOException
public static <T extends SequenceElement> void writeSequenceVectors(@NonNull @NonNull SequenceVectors<T> vectors, @NonNull @NonNull SequenceElementFactory<T> factory, @NonNull @NonNull OutputStream stream) throws IOException
T
- vectors
- SequenceVectors modelfactory
- SequenceElementFactory implementation for your objectsstream
- Target output streamIOException
public static <T extends SequenceElement> void writeSequenceVectors(@NonNull @NonNull SequenceVectors<T> vectors, @NonNull @NonNull OutputStream stream) throws IOException
T
- vectors
- SequenceVectors modelstream
- Target output streamIOException
public static <T extends SequenceElement> SequenceVectors<T> readSequenceVectors(@NonNull @NonNull String path, boolean readExtendedTables) throws IOException
T
- path
- StringreadExtendedTables
- booleanIOException
public static <T extends SequenceElement> SequenceVectors<T> readSequenceVectors(@NonNull @NonNull File file, boolean readExtendedTables) throws IOException
T
- file
- FilereadExtendedTables
- booleanIOException
public static <T extends SequenceElement> SequenceVectors<T> readSequenceVectors(@NonNull @NonNull InputStream stream, boolean readExtendedTables) throws IOException
T
- stream
- InputStreamreadExtendedTables
- booleanIOException
public static <T extends SequenceElement> SequenceVectors<T> readSequenceVectors(@NonNull @NonNull SequenceElementFactory<T> factory, @NonNull @NonNull File file) throws IOException
T
- factory
- file
- IOException
public static <T extends SequenceElement> SequenceVectors<T> readSequenceVectors(@NonNull @NonNull SequenceElementFactory<T> factory, @NonNull @NonNull InputStream stream) throws IOException
T
- factory
- stream
- IOException
public static void writeVocabCache(@NonNull @NonNull VocabCache<VocabWord> vocabCache, @NonNull @NonNull File file) throws IOException
vocabCache
- file
- UnsupportedEncodingException
IOException
public static void writeVocabCache(@NonNull @NonNull VocabCache<VocabWord> vocabCache, @NonNull @NonNull OutputStream stream) throws IOException
vocabCache
- stream
- UnsupportedEncodingException
IOException
public static VocabCache<VocabWord> readVocabCache(@NonNull @NonNull File file) throws IOException
file
- IOException
public static VocabCache<VocabWord> readVocabCache(@NonNull @NonNull InputStream stream) throws IOException
stream
- IOException
public static Word2Vec readWord2VecModel(String path)
Please note: Only weights will be loaded by this method.
path
- public static Word2Vec readWord2VecModel(String path, boolean extendedModel)
Please note: Only weights will be loaded by this method.
path
- path to model fileextendedModel
- if TRUE, we'll try to load HS states & Huffman tree info, if FALSE, only weights will be loadedpublic static Word2Vec readWord2VecModel(File file)
Please note: Only weights will be loaded by this method.
file
- public static Word2Vec readWord2VecModel(File file, boolean extendedModel)
Please note: if extended data isn't available, only weights will be loaded instead.
file
- model fileextendedModel
- if TRUE, we'll try to load HS states & Huffman tree info, if FALSE, only weights will be loadedpublic static Word2Vec readAsBinaryNoLineBreaks(@NonNull @NonNull File file)
public static Word2Vec readAsBinaryNoLineBreaks(@NonNull @NonNull InputStream inputStream)
public static Word2Vec readAsBinary(@NonNull @NonNull InputStream inputStream)
inputStream
- binary input streampublic static Word2Vec readAsCsv(@NonNull @NonNull InputStream inputStream)
inputStream
- input streamprotected static TokenizerFactory getTokenizerFactory(VectorsConfiguration configuration)
public static WordVectors loadStaticModel(InputStream inputStream) throws IOException
inputStream
- InputStream should point to previously saved w2v modelIOException
public static WordVectors loadStaticModel(@NonNull @NonNull File file)
file
- Filepublic static void writeWord2Vec(@NonNull @NonNull Word2Vec word2Vec, @NonNull @NonNull OutputStream stream) throws IOException
word2Vec
- Word2Vecstream
- OutputStreamIOException
public static Word2Vec readWord2Vec(@NonNull @NonNull String path, boolean readExtendedTables)
path
- readExtendedTables
- public static <T extends SequenceElement> void writeLookupTable(WeightLookupTable<T> weightLookupTable, @NonNull @NonNull File file) throws IOException
weightLookupTable
- WeightLookupTablefile
- FileIOException
public static <T extends SequenceElement> WeightLookupTable<T> readLookupTable(File file) throws IOException
IOException
public static <T extends SequenceElement> WeightLookupTable<T> readLookupTable(InputStream stream) throws IOException
IOException
public static Word2Vec readWord2Vec(@NonNull @NonNull File file, boolean readExtendedTables)
file
- FilereadExtendedTables
- booleanpublic static Word2Vec readWord2Vec(@NonNull @NonNull InputStream stream, boolean readExtendedTable) throws IOException
stream
- InputStreamreadExtendedTable
- booleanIOException
public static void writeWordVectors(@NonNull @NonNull FastText vectors, @NonNull @NonNull File path) throws IOException
vectors
- FastTextpath
- FileIOException
public static FastText readWordVectors(File path)
path
- Filepublic static void printOutProjectedMemoryUse(long numWords, int vectorLength, int numTables)
numWords
- vectorLength
- numTables
- Copyright © 2022. All rights reserved.