public class WordVectorSerializer extends Object
Modifier and Type | Method and Description |
---|---|
static WordVectors |
fromPair(Pair<InMemoryLookupTable,VocabCache> pair)
Load word vectors from the given pair
|
static WordVectors |
fromTableAndVocab(WeightLookupTable table,
VocabCache vocab)
Load word vectors for the given vocab and table
|
static float |
getFloat(byte[] b)
Read a string from a data input stream Credit to:
https://github.com/NLPchina/Word2VEC_java/blob/master/src/com/ansj/vec/Word2VEC.java
|
static Word2Vec |
loadFullModel(String path)
This method loads full w2v model, previously saved with writeFullMethod call
|
static WordVectors |
loadGoogleModel(File modelFile,
boolean binary)
Loads the google model
|
static WordVectors |
loadGoogleModel(File modelFile,
boolean binary,
boolean lineBreaks)
Loads the Google model.
|
static WordVectors |
loadGoogleModelNonNormalized(File modelFile,
boolean binary,
boolean lineBreaks)
Loads the Google model without normalization being applied.
|
static Pair<InMemoryLookupTable,VocabCache> |
loadTxt(File vectorsFile)
Loads an in memory cache from the given path (sets syn0 and the vocab)
|
static WordVectors |
loadTxtVectors(File vectorsFile)
Loads an in memory cache from the given path (sets syn0 and the vocab)
|
static WordVectors |
loadTxtVectors(InputStream stream,
boolean skipFirstLine)
This method can be used to load previously saved model from InputStream (like a HDFS-stream)
|
static float |
readFloat(InputStream is)
Read a float from a data input stream Credit to:
https://github.com/NLPchina/Word2VEC_java/blob/master/src/com/ansj/vec/Word2VEC.java
|
static ParagraphVectors |
readParagraphVectorsFromText(File file)
Restores previously serialized ParagraphVectors model
|
static ParagraphVectors |
readParagraphVectorsFromText(InputStream stream)
Restores previously serialized ParagraphVectors model
|
static ParagraphVectors |
readParagraphVectorsFromText(String path)
Restores previously serialized ParagraphVectors model
|
static <T extends SequenceElement> |
readSequenceVectors(SequenceElementFactory<T> factory,
File file)
This method loads previously saved SequenceVectors model from File
|
static <T extends SequenceElement> |
readSequenceVectors(SequenceElementFactory<T> factory,
InputStream stream)
This method loads previously saved SequenceVectors model from InputStream
|
static String |
readString(DataInputStream dis)
Read a string from a data input stream Credit to:
https://github.com/NLPchina/Word2VEC_java/blob/master/src/com/ansj/vec/Word2VEC.java
|
static VocabCache<VocabWord> |
readVocabCache(File file)
This method reads vocab cache from provided file.
|
static VocabCache<VocabWord> |
readVocabCache(InputStream stream)
This method reads vocab cache from provided InputStream.
|
static void |
writeFullModel(Word2Vec vec,
String path)
Saves full Word2Vec model in the way, that allows model updates without being rebuilt from scratches
|
static <T extends SequenceElement> |
writeSequenceVectors(SequenceVectors<T> vectors,
SequenceElementFactory<T> factory,
File file)
This method saves specified SequenceVectors model to target file
|
static <T extends SequenceElement> |
writeSequenceVectors(SequenceVectors<T> vectors,
SequenceElementFactory<T> factory,
OutputStream stream)
This method saves specified SequenceVectors model to target OutputStream
|
static <T extends SequenceElement> |
writeSequenceVectors(SequenceVectors<T> vectors,
SequenceElementFactory<T> factory,
String path)
This method saves specified SequenceVectors model to target file path
|
static void |
writeTsneFormat(Glove vec,
org.nd4j.linalg.api.ndarray.INDArray tsne,
File csv)
Write the tsne format
|
static void |
writeTsneFormat(Word2Vec vec,
org.nd4j.linalg.api.ndarray.INDArray tsne,
File csv)
Write the tsne format
|
static void |
writeVocabCache(VocabCache<VocabWord> vocabCache,
File file)
This method saves vocab cache to provided File.
|
static void |
writeVocabCache(VocabCache<VocabWord> vocabCache,
OutputStream stream)
This method saves vocab cache to provided OutputStream.
|
static void |
writeWordVectors(Glove vectors,
File file)
This method saves GloVe model to the given output stream.
|
static void |
writeWordVectors(Glove vectors,
OutputStream stream)
This method saves GloVe model to the given OutputStream
|
static void |
writeWordVectors(Glove vectors,
String path)
This method saves GloVe model to the given output stream.
|
static void |
writeWordVectors(InMemoryLookupTable lookupTable,
InMemoryLookupCache cache,
String path)
Writes the word vectors to the given path.
|
static void |
writeWordVectors(ParagraphVectors vectors,
File path)
This method saves paragraph vectors to the given file.
|
static void |
writeWordVectors(ParagraphVectors vectors,
OutputStream stream)
This method saves paragraph vectors to the given output stream.
|
static void |
writeWordVectors(ParagraphVectors vectors,
String path)
This method saves paragraph vectors to the given path.
|
static <T extends SequenceElement> |
writeWordVectors(WeightLookupTable<T> lookupTable,
File file)
This mehod writes word vectors to the given file.
|
static <T extends SequenceElement> |
writeWordVectors(WeightLookupTable<T> lookupTable,
OutputStream stream)
This mehod writes word vectors to the given OutputStream.
|
static <T extends SequenceElement> |
writeWordVectors(WeightLookupTable<T> lookupTable,
String path)
This mehod writes word vectors to the given path.
|
static void |
writeWordVectors(Word2Vec vec,
BufferedWriter writer)
Writes the word vectors to the given BufferedWriter.
|
static void |
writeWordVectors(Word2Vec vec,
File file)
Writes the word vectors to the given path.
|
static void |
writeWordVectors(Word2Vec vec,
OutputStream outputStream)
Writes the word vectors to the given OutputStream.
|
static void |
writeWordVectors(Word2Vec vec,
String path)
Writes the word vectors to the given path.
|
public static WordVectors loadGoogleModel(File modelFile, boolean binary) throws IOException
modelFile
- the path to the google modelbinary
- read from binary file format (if set to true) or from text file format.IOException
public static WordVectors loadGoogleModel(File modelFile, boolean binary, boolean lineBreaks) throws IOException
modelFile
- the input filebinary
- read from binary or text file formatlineBreaks
- if true, the input file is expected to terminate each line with a line break. This
is typically the case for files created with recent versions of Word2Vec, but not
for the downloadable model files.Word2Vec
objectIOException
public static WordVectors loadGoogleModelNonNormalized(File modelFile, boolean binary, boolean lineBreaks) throws IOException
modelFile
- binary
- lineBreaks
- IOException
public static float readFloat(InputStream is) throws IOException
is
- IOException
public static float getFloat(byte[] b)
b
- IOException
public static String readString(DataInputStream dis) throws IOException
dis
- IOException
public static <T extends SequenceElement> void writeWordVectors(WeightLookupTable<T> lookupTable, String path) throws IOException
T
- lookupTable
- path
- IOException
public static <T extends SequenceElement> void writeWordVectors(WeightLookupTable<T> lookupTable, File file) throws IOException
T
- lookupTable
- file
- IOException
public static <T extends SequenceElement> void writeWordVectors(WeightLookupTable<T> lookupTable, OutputStream stream) throws IOException
T
- lookupTable
- stream
- IOException
public static void writeWordVectors(@NonNull ParagraphVectors vectors, @NonNull File path)
vectors
- path
- public static void writeWordVectors(@NonNull ParagraphVectors vectors, @NonNull String path)
vectors
- path
- public static ParagraphVectors readParagraphVectorsFromText(@NonNull String path)
path
- Path to file that contains previously serialized modelpublic static ParagraphVectors readParagraphVectorsFromText(@NonNull File file)
file
- File that contains previously serialized modelpublic static ParagraphVectors readParagraphVectorsFromText(@NonNull InputStream stream)
stream
- InputStream that contains previously serialized modelpublic static void writeWordVectors(@NonNull Glove vectors, @NonNull File file)
vectors
- GloVe model to be savedfile
- path where model should be saved topublic static void writeWordVectors(@NonNull Glove vectors, @NonNull String path)
vectors
- GloVe model to be savedpath
- path where model should be saved topublic static void writeWordVectors(@NonNull Glove vectors, @NonNull OutputStream stream)
vectors
- GloVe model to be savedstream
- OutputStream where model should be saved topublic static void writeWordVectors(ParagraphVectors vectors, OutputStream stream)
vectors
- stream
- public static void writeWordVectors(InMemoryLookupTable lookupTable, InMemoryLookupCache cache, String path) throws IOException
lookupTable
- cache
- path
- the path to writeIOException
public static void writeFullModel(@NonNull Word2Vec vec, @NonNull String path)
vec
- - The Word2Vec instance to be savedpath
- - the path for json to be savedpublic static Word2Vec loadFullModel(@NonNull String path) throws FileNotFoundException
path
- - path to previously stored w2v json modelFileNotFoundException
public static void writeWordVectors(@NonNull Word2Vec vec, @NonNull String path) throws IOException
vec
- the word2vec to writepath
- the path to writeIOException
public static void writeWordVectors(@NonNull Word2Vec vec, @NonNull File file) throws IOException
vec
- the word2vec to writefile
- the file to writeIOException
public static void writeWordVectors(@NonNull Word2Vec vec, @NonNull OutputStream outputStream) throws IOException
vec
- the word2vec to writeoutputStream
- - OutputStream, where all data should be sent to
the path to writeIOException
public static void writeWordVectors(@NonNull Word2Vec vec, @NonNull BufferedWriter writer) throws IOException
vec
- the word2vec to writewriter
- - BufferedWriter, where all data should be written to
the path to writeIOException
public static WordVectors fromTableAndVocab(WeightLookupTable table, VocabCache vocab)
table
- the weights to usevocab
- the vocab to usepublic static WordVectors fromPair(Pair<InMemoryLookupTable,VocabCache> pair)
pair
- the given pairpublic static WordVectors loadTxtVectors(File vectorsFile) throws FileNotFoundException, UnsupportedEncodingException
vectorsFile
- the path of the file to load\FileNotFoundException
- if the file does not existUnsupportedEncodingException
public static Pair<InMemoryLookupTable,VocabCache> loadTxt(File vectorsFile) throws FileNotFoundException, UnsupportedEncodingException
vectorsFile
- the path of the file to loadFileNotFoundException
- if the input file does not existUnsupportedEncodingException
public static WordVectors loadTxtVectors(@NonNull InputStream stream, boolean skipFirstLine) throws IOException
stream
- InputStream that contains previously serialized modelskipFirstLine
- Set this TRUE if first line contains csv header, FALSE otherwiseIOException
public static void writeTsneFormat(Glove vec, org.nd4j.linalg.api.ndarray.INDArray tsne, File csv) throws Exception
vec
- the word vectors to use for labelingtsne
- the tsne array to writecsv
- the file to useException
public static void writeTsneFormat(Word2Vec vec, org.nd4j.linalg.api.ndarray.INDArray tsne, File csv) throws Exception
vec
- the word vectors to use for labelingtsne
- the tsne array to writecsv
- the file to useException
public static <T extends SequenceElement> void writeSequenceVectors(@NonNull SequenceVectors<T> vectors, @NonNull SequenceElementFactory<T> factory, @NonNull String path) throws IOException
T
- vectors
- SequenceVectors modelfactory
- SequenceElementFactory implementation for your objectspath
- Target output file pathIOException
public static <T extends SequenceElement> void writeSequenceVectors(@NonNull SequenceVectors<T> vectors, @NonNull SequenceElementFactory<T> factory, @NonNull File file) throws IOException
T
- vectors
- SequenceVectors modelfactory
- SequenceElementFactory implementation for your objectsfile
- Target output fileIOException
public static <T extends SequenceElement> void writeSequenceVectors(@NonNull SequenceVectors<T> vectors, @NonNull SequenceElementFactory<T> factory, @NonNull OutputStream stream) throws IOException
T
- vectors
- SequenceVectors modelfactory
- SequenceElementFactory implementation for your objectsstream
- Target output streamIOException
public static <T extends SequenceElement> SequenceVectors<T> readSequenceVectors(@NonNull SequenceElementFactory<T> factory, @NonNull File file) throws IOException
T
- factory
- file
- IOException
public static <T extends SequenceElement> SequenceVectors<T> readSequenceVectors(@NonNull SequenceElementFactory<T> factory, @NonNull InputStream stream) throws IOException
T
- factory
- stream
- IOException
public static void writeVocabCache(@NonNull VocabCache<VocabWord> vocabCache, @NonNull File file) throws IOException
vocabCache
- file
- UnsupportedEncodingException
IOException
public static void writeVocabCache(@NonNull VocabCache<VocabWord> vocabCache, @NonNull OutputStream stream) throws IOException
vocabCache
- stream
- UnsupportedEncodingException
IOException
public static VocabCache<VocabWord> readVocabCache(@NonNull File file) throws IOException
file
- IOException
public static VocabCache<VocabWord> readVocabCache(@NonNull InputStream stream) throws IOException
stream
- IOException
Copyright © 2016. All Rights Reserved.