DatastoreGoogleNGram

A class that parses Google N-Gram data (http://commondatastorage.googleapis.com/books/syntactic-ngrams/index.html) to provide information about a requested n-gram. Takes the datastore location details for a data directory and parses each file, expected to be in the following format (from https://docs.google.com/document/d/14PWeoTkrnKk9H8_7CfVbdvuoFZ7jYivNTkBX2Hj7qLw/edit) - format: OFF head_word<TAB>syntactic-ngram<TAB>total_count<TAB>counts_by_year The counts_by_year format is a tab-separated list of year<comma>count items. Years are sorted in ascending order, and only years with non-zero counts are included. The syntactic-ngram format is a space-separated list of tokens, each token format is: “word/pos-tag/dep-label/head-index”. The word field can contain any non-whitespace character. The other fields can contain any non-whitespace character except for ‘/’. pos-tag is a Penn-Treebank part-of-speech tag. dep-label is a stanford-basic-dependencies label. head-index is an integer, pointing to the head of the current token. “1” refers to the first token in the list, 2 the second, and 0 indicates that the head is the root of the fragment. format: ON

Linear Supertypes

Serializable, Serializable, Product, Equals, AnyRef, Any

Instance Constructors

new DatastoreGoogleNGram(groupName: String, artifactName: String, version: Int, frequencyCutoff: Int)

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
val artifactName: String
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
val frequencyCutoff: Int
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
val googleNgramDir: File
val googleNgramPath: Path
val groupName: String
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
val ngramMap: Map[String, Seq[NgramInfo]]
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
val version: Int
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Docs: object DatastoreGoogleNGram | package ml

case class DatastoreGoogleNGram(groupName: String, artifactName: String, version: Int, frequencyCutoff: Int) extends Product with Serializable

Instance Constructors

new DatastoreGoogleNGram(groupName: String, artifactName: String, version: Int, frequencyCutoff: Int)

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

val artifactName: String

final def asInstanceOf[T0]: T0

def clone(): AnyRef

final def eq(arg0: AnyRef): Boolean

def finalize(): Unit

val frequencyCutoff: Int

final def getClass(): Class[_]

val googleNgramDir: File

val googleNgramPath: Path

val groupName: String

final def isInstanceOf[T0]: Boolean

final def ne(arg0: AnyRef): Boolean

val ngramMap: Map[String, Seq[NgramInfo]]

final def notify(): Unit

final def notifyAll(): Unit

final def synchronized[T0](arg0: ⇒ T0): T0

val version: Int

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from Serializable

Inherited from Serializable

Inherited from Product

Inherited from Equals

Inherited from AnyRef

Inherited from Any

Ungrouped