NerDLApproach

Companion object NerDLApproach

class NerDLApproach extends AnnotatorApproach[NerDLModel] with NerApproach[NerDLApproach] with Logging with ParamsAndFeaturesWritable

This Named Entity recognition annotator allows to train generic NER model based on Neural Networks. Its train data (train_ner) is either a labeled or an external CoNLL 2003 IOB based spark dataset with Annotations columns. Also the user has to provide word embeddings annotation column. Neural Network architecture is Char CNNs - BiLSTM - CRF that achieves state-of-the-art in most datasets.

See https://github.com/JohnSnowLabs/spark-nlp/tree/master/src/test/scala/com/johnsnowlabs/nlp/annotators/ner/dl for further reference on how to use this API.

Linear Supertypes

ParamsAndFeaturesWritable, HasFeatures, Logging, NerApproach[NerDLApproach], AnnotatorApproach[NerDLModel], CanBeLazy, DefaultParamsWritable, MLWritable, HasOutputAnnotatorType, HasOutputAnnotationCol, HasInputAnnotationCols, Estimator[NerDLModel], PipelineStage, Logging, Params, Serializable, Serializable, Identifiable, AnyRef, Any

Ordering

Grouped
Alphabetic
By Inheritance

Inherited

NerDLApproach
ParamsAndFeaturesWritable
HasFeatures
Logging
NerApproach
AnnotatorApproach
CanBeLazy
DefaultParamsWritable
MLWritable
HasOutputAnnotatorType
HasOutputAnnotationCol
HasInputAnnotationCols
Estimator
PipelineStage
Logging
Params
Serializable
Serializable
Identifiable
AnyRef
Any

Hide All
Show All

Visibility

Public
All

Instance Constructors

new NerDLApproach()
new NerDLApproach(uid: String)

Type Members

type AnnotatorType = String

Definition Classes
HasOutputAnnotatorType

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def $[T](param: Param[T]): T

Attributes
protected
Definition Classes
Params
def $$[T](feature: StructFeature[T]): T

Attributes
protected
Definition Classes
HasFeatures
def $$[K, V](feature: MapFeature[K, V]): Map[K, V]

Attributes
protected
Definition Classes
HasFeatures
def $$[T](feature: SetFeature[T]): Set[T]

Attributes
protected
Definition Classes
HasFeatures
def $$[T](feature: ArrayFeature[T]): Array[T]

Attributes
protected
Definition Classes
HasFeatures
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def _fit(dataset: Dataset[_], recursiveStages: Option[PipelineModel]): NerDLModel

Attributes
protected
Definition Classes
AnnotatorApproach
final def asInstanceOf[T0]: T0

Definition Classes
Any
val batchSize: IntParam
Batch size
def beforeTraining(spark: SparkSession): Unit

Definition Classes
NerDLApproach → AnnotatorApproach
def calculateEmbeddingsDim(sentences: Seq[WordpieceEmbeddingsSentence]): Int
final def checkSchema(schema: StructType, inputAnnotatorType: String): Boolean

Attributes
protected
Definition Classes
HasInputAnnotationCols
final def clear(param: Param[_]): NerDLApproach.this.type

Definition Classes
Params
def clone(): AnyRef

Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws( ... ) @native()
val configProtoBytes: IntArrayParam
ConfigProto from tensorflow, serialized into byte array.
ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()
final def copy(extra: ParamMap): Estimator[NerDLModel]

Definition Classes
AnnotatorApproach → Estimator → PipelineStage → Params
def copyValues[T <: Params](to: T, extra: ParamMap): T

Attributes
protected
Definition Classes
Params
final def defaultCopy[T <: Params](extra: ParamMap): T

Attributes
protected
Definition Classes
Params
val description: String
Trains Tensorflow based Char-CNN-BLSTM model
Trains Tensorflow based Char-CNN-BLSTM model

Definition Classes
NerDLApproach → AnnotatorApproach
val dropout: FloatParam
"Dropout coefficient
val enableMemoryOptimizer: BooleanParam
val enableOutputLogs: BooleanParam
Whether to output to annotators log folder
val entities: StringArrayParam
Entities to recognize
Entities to recognize

Definition Classes
NerApproach
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
val evaluationLogExtended: BooleanParam
Whether logs for validation to be extended: it displays time and evaluation of each label.
Whether logs for validation to be extended: it displays time and evaluation of each label. Default is false.
def explainParam(param: Param[_]): String

Definition Classes
Params
def explainParams(): String

Definition Classes
Params
final def extractParamMap(): ParamMap

Definition Classes
Params
final def extractParamMap(extra: ParamMap): ParamMap

Definition Classes
Params
val features: ArrayBuffer[Feature[_, _, _]]

Definition Classes
HasFeatures
def finalize(): Unit

Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def fit(dataset: Dataset[_]): NerDLModel

Definition Classes
AnnotatorApproach → Estimator
def fit(dataset: Dataset[_], paramMaps: Array[ParamMap]): Seq[NerDLModel]

Definition Classes
Estimator
Annotations
@Since( "2.0.0" )
def fit(dataset: Dataset[_], paramMap: ParamMap): NerDLModel

Definition Classes
Estimator
Annotations
@Since( "2.0.0" )
def fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): NerDLModel

Definition Classes
Estimator
Annotations
@Since( "2.0.0" ) @varargs()
def get[T](feature: StructFeature[T]): Option[T]

Attributes
protected
Definition Classes
HasFeatures
def get[K, V](feature: MapFeature[K, V]): Option[Map[K, V]]

Attributes
protected
Definition Classes
HasFeatures
def get[T](feature: SetFeature[T]): Option[Set[T]]

Attributes
protected
Definition Classes
HasFeatures
def get[T](feature: ArrayFeature[T]): Option[Array[T]]

Attributes
protected
Definition Classes
HasFeatures
final def get[T](param: Param[T]): Option[T]

Definition Classes
Params
def getBatchSize: Int
Batch size
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
Annotations
@native()
def getConfigProtoBytes: Option[Array[Byte]]
ConfigProto from tensorflow, serialized into byte array.
ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()
def getDataSetParams(dsIt: Iterator[Array[(TextSentenceLabels, WordpieceEmbeddingsSentence)]]): (Set[String], Set[Char], Int, Long)
final def getDefault[T](param: Param[T]): Option[T]

Definition Classes
Params
def getDropout: Float
Dropout coefficient
def getEnableMemoryOptimizer: Boolean
Memory Optimizer
def getEnableOutputLogs: Boolean
Whether to output to annotators log folder
def getIncludeConfidence: Boolean
whether to include confidence scores in annotation metadata
def getInputCols: Array[String]
returns
input annotations columns currently used

Definition Classes
HasInputAnnotationCols
def getIteratorFunc(dataset: Dataset[Row]): () ⇒ Iterator[Array[(TextSentenceLabels, WordpieceEmbeddingsSentence)]]
def getLazyAnnotator: Boolean

Definition Classes
CanBeLazy
def getLogName: String

Definition Classes
NerDLApproach → Logging
def getLr: Float
Learning Rate
def getMaxEpochs: Int
Maximum number of epochs to train
Maximum number of epochs to train

Definition Classes
NerApproach
def getMinEpochs: Int
Minimum number of epochs to train
Minimum number of epochs to train

Definition Classes
NerApproach
final def getOrDefault[T](param: Param[T]): T

Definition Classes
Params
final def getOutputCol: String
Gets annotation column name going to generate
Gets annotation column name going to generate

Definition Classes
HasOutputAnnotationCol
def getOutputLogsPath: String
def getParam(paramName: String): Param[Any]

Definition Classes
Params
def getPo: Float
Learning rate decay coefficient.
Learning rate decay coefficient. Real Learning Rage = lr / (1 + po * epoch)
def getRandomSeed: Int
Random seed
Random seed

Definition Classes
NerApproach
def getUseContrib: Boolean
Whether to use contrib LSTM Cells.
Whether to use contrib LSTM Cells. Not compatible with Windows. Might slightly improve accuracy.
def getValidationSplit: Float
Choose the proportion of training dataset to be validated against the model on each Epoch.
Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off.
def getVerbose: Int
Level of verbosity during training
Level of verbosity during training

Definition Classes
NerApproach
val graphFolder: Param[String]
Folder path that contain external graph files
final def hasDefault[T](param: Param[T]): Boolean

Definition Classes
Params
def hasParam(paramName: String): Boolean

Definition Classes
Params
def hashCode(): Int

Definition Classes
AnyRef → Any
Annotations
@native()
val includeConfidence: BooleanParam
val includeConfidence = new BooleanParam(this, "includeConfidence", "Whether to include confidence scores in annotation metadata")
def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean

Attributes
protected
Definition Classes
Logging
def initializeLogIfNecessary(isInterpreter: Boolean): Unit

Attributes
protected
Definition Classes
Logging
val inputAnnotatorTypes: Array[String]
Input annotator types : DOCUMENT, TOKEN, WORD_EMBEDDINGS
Input annotator types : DOCUMENT, TOKEN, WORD_EMBEDDINGS

Definition Classes
NerDLApproach → HasInputAnnotationCols
final val inputCols: StringArrayParam
columns that contain annotations necessary to run this annotator AnnotatorType is used both as input and output columns if not specified
columns that contain annotations necessary to run this annotator AnnotatorType is used both as input and output columns if not specified

Attributes
protected
Definition Classes
HasInputAnnotationCols
final def isDefined(param: Param[_]): Boolean

Definition Classes
Params
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def isSet(param: Param[_]): Boolean

Definition Classes
Params
def isTraceEnabled(): Boolean

Attributes
protected
Definition Classes
Logging
val labelColumn: Param[String]
Column with label per each token
Column with label per each token

Definition Classes
NerApproach
val lazyAnnotator: BooleanParam

Definition Classes
CanBeLazy
def log(value: ⇒ String, minLevel: Level): Unit

Attributes
protected
Definition Classes
Logging
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
val logger: Logger

Attributes
protected
Definition Classes
Logging
val lr: FloatParam
Learning Rate
val maxEpochs: IntParam
Maximum number of epochs to train
Maximum number of epochs to train

Definition Classes
NerApproach
val minEpochs: IntParam
Minimum number of epochs to train
Minimum number of epochs to train

Definition Classes
NerApproach
def msgHelper(schema: StructType): String

Attributes
protected
Definition Classes
HasInputAnnotationCols
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
Annotations
@native()
final def notifyAll(): Unit

Definition Classes
AnyRef
Annotations
@native()
def onTrained(model: NerDLModel, spark: SparkSession): Unit

Definition Classes
AnnotatorApproach
def onWrite(path: String, spark: SparkSession): Unit

Attributes
protected
Definition Classes
ParamsAndFeaturesWritable
val outputAnnotatorType: String
Input annotator types : NAMED_ENTITY
Input annotator types : NAMED_ENTITY

Definition Classes
NerDLApproach → HasOutputAnnotatorType
final val outputCol: Param[String]

Attributes
protected
Definition Classes
HasOutputAnnotationCol
def outputLog(value: ⇒ String, uuid: String, shouldLog: Boolean, outputLogsPath: String): Unit

Attributes
protected
Definition Classes
Logging
val outputLogsPath: Param[String]
lazy val params: Array[Param[_]]

Definition Classes
Params
val po: FloatParam
Learning rate decay coefficient.
Learning rate decay coefficient. Real Learning Rage = lr / (1 + po * epoch)
val randomSeed: IntParam
Random seed
Random seed

Definition Classes
NerApproach
def save(path: String): Unit

Definition Classes
MLWritable
Annotations
@Since( "1.6.0" ) @throws( ... )
def set[T](feature: StructFeature[T], value: T): NerDLApproach.this.type

Attributes
protected
Definition Classes
HasFeatures
def set[K, V](feature: MapFeature[K, V], value: Map[K, V]): NerDLApproach.this.type

Attributes
protected
Definition Classes
HasFeatures
def set[T](feature: SetFeature[T], value: Set[T]): NerDLApproach.this.type

Attributes
protected
Definition Classes
HasFeatures
def set[T](feature: ArrayFeature[T], value: Array[T]): NerDLApproach.this.type

Attributes
protected
Definition Classes
HasFeatures
final def set(paramPair: ParamPair[_]): NerDLApproach.this.type

Attributes
protected
Definition Classes
Params
final def set(param: String, value: Any): NerDLApproach.this.type

Attributes
protected
Definition Classes
Params
final def set[T](param: Param[T], value: T): NerDLApproach.this.type

Definition Classes
Params
def setBatchSize(batch: Int): NerDLApproach.this.type
Batch size
def setConfigProtoBytes(bytes: Array[Int]): NerDLApproach.this.type
ConfigProto from tensorflow, serialized into byte array.
ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()
def setDefault[T](feature: StructFeature[T], value: () ⇒ T): NerDLApproach.this.type

Attributes
protected
Definition Classes
HasFeatures
def setDefault[K, V](feature: MapFeature[K, V], value: () ⇒ Map[K, V]): NerDLApproach.this.type

Attributes
protected
Definition Classes
HasFeatures
def setDefault[T](feature: SetFeature[T], value: () ⇒ Set[T]): NerDLApproach.this.type

Attributes
protected
Definition Classes
HasFeatures
def setDefault[T](feature: ArrayFeature[T], value: () ⇒ Array[T]): NerDLApproach.this.type

Attributes
protected
Definition Classes
HasFeatures
final def setDefault(paramPairs: ParamPair[_]*): NerDLApproach.this.type

Attributes
protected
Definition Classes
Params
final def setDefault[T](param: Param[T], value: T): NerDLApproach.this.type

Attributes
protected
Definition Classes
Params
def setDropout(dropout: Float): NerDLApproach.this.type
Dropout coefficient
def setEnableMemoryOptimizer(value: Boolean): NerDLApproach.this.type
def setEnableOutputLogs(enableOutputLogs: Boolean): NerDLApproach.this.type
Whether to output to annotators log folder
def setEntities(tags: Array[String]): NerDLApproach
Entities to recognize
Entities to recognize

Definition Classes
NerApproach
def setEvaluationLogExtended(evaluationLogExtended: Boolean): NerDLApproach.this.type
Whether logs for validation to be extended: it displays time and evaluation of each label.
Whether logs for validation to be extended: it displays time and evaluation of each label. Default is false.
def setGraphFolder(path: String): NerDLApproach.this.type
Folder path that contain external graph files
def setIncludeConfidence(value: Boolean): NerDLApproach.this.type
Whether to include confidence scores in annotation metadata
final def setInputCols(value: String*): NerDLApproach.this.type

Definition Classes
HasInputAnnotationCols
final def setInputCols(value: Array[String]): NerDLApproach.this.type
Overrides required annotators column if different than default
Overrides required annotators column if different than default

Definition Classes
HasInputAnnotationCols
def setLabelColumn(column: String): NerDLApproach
Column with label per each token
Column with label per each token

Definition Classes
NerApproach
def setLazyAnnotator(value: Boolean): NerDLApproach.this.type

Definition Classes
CanBeLazy
def setLr(lr: Float): NerDLApproach.this.type
Learning Rate
def setMaxEpochs(epochs: Int): NerDLApproach
Maximum number of epochs to train
Maximum number of epochs to train

Definition Classes
NerApproach
def setMinEpochs(epochs: Int): NerDLApproach
Minimum number of epochs to train
Minimum number of epochs to train

Definition Classes
NerApproach
final def setOutputCol(value: String): NerDLApproach.this.type
Overrides annotation column name when transforming
Overrides annotation column name when transforming

Definition Classes
HasOutputAnnotationCol
def setOutputLogsPath(path: String): NerDLApproach.this.type
def setPo(po: Float): NerDLApproach.this.type
Learning rate decay coefficient.
Learning rate decay coefficient. Real Learning Rage = lr / (1 + po * epoch)
def setRandomSeed(seed: Int): NerDLApproach
Random seed
Random seed

Definition Classes
NerApproach
def setTestDataset(er: ExternalResource): NerDLApproach.this.type
Path to test dataset.
Path to test dataset. If set used to calculate statistic on it during training.
def setTestDataset(path: String, readAs: Format = ReadAs.SPARK, options: Map[String, String] = Map("format" -> "parquet")): NerDLApproach.this.type
Path to test dataset.
Path to test dataset. If set used to calculate statistic on it during training.
def setUseContrib(value: Boolean): NerDLApproach.this.type
Whether to use contrib LSTM Cells.
Whether to use contrib LSTM Cells. Not compatible with Windows. Might slightly improve accuracy.
def setValidationSplit(validationSplit: Float): NerDLApproach.this.type
Choose the proportion of training dataset to be validated against the model on each Epoch.
Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off.
def setVerbose(verbose: Level): NerDLApproach
Level of verbosity during training
Level of verbosity during training

Definition Classes
NerApproach
def setVerbose(verbose: Int): NerDLApproach
Level of verbosity during training
Level of verbosity during training

Definition Classes
NerApproach
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
val testDataset: ExternalResourceParam
val testDataset = new ExternalResourceParam(this, "testDataset", "Path to test dataset.
val testDataset = new ExternalResourceParam(this, "testDataset", "Path to test dataset. If set used to calculate statistic on it during training.")
def toString(): String

Definition Classes
Identifiable → AnyRef → Any
def train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): NerDLModel

Definition Classes
NerDLApproach → AnnotatorApproach
final def transformSchema(schema: StructType): StructType
requirement for pipeline transformation validation.
requirement for pipeline transformation validation. It is called on fit()

Definition Classes
AnnotatorApproach → PipelineStage
def transformSchema(schema: StructType, logging: Boolean): StructType

Attributes
protected
Definition Classes
PipelineStage
Annotations
@DeveloperApi()
val uid: String

Definition Classes
NerDLApproach → Identifiable
val useContrib: BooleanParam
whether to use contrib LSTM Cells.
whether to use contrib LSTM Cells. Not compatible with Windows. Might slightly improve accuracy.
def validate(schema: StructType): Boolean
takes a Dataset and checks to see if all the required annotation types are present.
takes a Dataset and checks to see if all the required annotation types are present.
schema
to be validated
returns
True if all the required types are present, else false

Attributes
protected
Definition Classes
AnnotatorApproach
val validationSplit: FloatParam
Choose the proportion of training dataset to be validated against the model on each Epoch.
Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off.
val verbose: IntParam
Level of verbosity during training
Level of verbosity during training

Definition Classes
NerApproach
val verboseLevel: Level

Definition Classes
NerDLApproach → Logging
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... ) @native()
def write: MLWriter

Definition Classes
ParamsAndFeaturesWritable → DefaultParamsWritable → MLWritable

Packages

NerDLApproach

Companion object NerDLApproach

class NerDLApproach extends AnnotatorApproach[NerDLModel] with NerApproach[NerDLApproach] with Logging with ParamsAndFeaturesWritable

Instance Constructors

Type Members

Value Members

Inherited from ParamsAndFeaturesWritable

Inherited from HasFeatures

Inherited from Logging

Inherited from NerApproach[NerDLApproach]

Inherited from AnnotatorApproach[NerDLModel]

Inherited from CanBeLazy

Inherited from DefaultParamsWritable

Inherited from MLWritable

Inherited from HasOutputAnnotatorType

Inherited from HasOutputAnnotationCol

Inherited from HasInputAnnotationCols

Inherited from Estimator[NerDLModel]

Inherited from PipelineStage

Inherited from Logging

Inherited from Params

Inherited from Serializable

Inherited from Serializable

Inherited from Identifiable

Inherited from AnyRef

Inherited from Any

Parameters

Annotator types

Members

Parameter setters

Parameter getters

Packages

NerDLApproach 

Companion object NerDLApproach

class NerDLApproach extends AnnotatorApproach[NerDLModel] with NerApproach[NerDLApproach] with Logging with ParamsAndFeaturesWritable

Instance Constructors

Type Members

Value Members

Inherited from ParamsAndFeaturesWritable

Inherited from HasFeatures

Inherited from Logging

Inherited from NerApproach[NerDLApproach]

Inherited from AnnotatorApproach[NerDLModel]

Inherited from CanBeLazy

Inherited from DefaultParamsWritable

Inherited from MLWritable

Inherited from HasOutputAnnotatorType

Inherited from HasOutputAnnotationCol

Inherited from HasInputAnnotationCols

Inherited from Estimator[NerDLModel]

Inherited from PipelineStage

Inherited from Logging

Inherited from Params

Inherited from Serializable

Inherited from Serializable

Inherited from Identifiable

Inherited from AnyRef

Inherited from Any

Parameters

Annotator types

Members

Parameter setters

Parameter getters

NerDLApproach