NerDLApproach

Instance Constructors

new NerDLApproach()
new NerDLApproach(uid: String)

Type Members

type AnnotatorType = String

Definition Classes
HasOutputAnnotatorType

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def $[T](param: Param[T]): T

Attributes
protected
Definition Classes
Params
def $$[T](feature: StructFeature[T]): T

Attributes
protected
Definition Classes
HasFeatures
def $$[K, V](feature: MapFeature[K, V]): Map[K, V]

Attributes
protected
Definition Classes
HasFeatures
def $$[T](feature: SetFeature[T]): Set[T]

Attributes
protected
Definition Classes
HasFeatures
def $$[T](feature: ArrayFeature[T]): Array[T]

Attributes
protected
Definition Classes
HasFeatures
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def _fit(dataset: Dataset[_], recursiveStages: Option[PipelineModel]): NerDLModel

Attributes
protected
Definition Classes
AnnotatorApproach
final def asInstanceOf[T0]: T0

Definition Classes
Any
val batchSize: IntParam

Batch size
def beforeTraining(spark: SparkSession): Unit

Definition Classes
NerDLApproach → AnnotatorApproach
def calculateEmbeddingsDim(sentences: Seq[WordpieceEmbeddingsSentence]): Int
final def checkSchema(schema: StructType, inputAnnotatorType: String): Boolean

Attributes
protected
Definition Classes
HasInputAnnotationCols
final def clear(param: Param[_]): NerDLApproach.this.type

Definition Classes
Params
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
val configProtoBytes: IntArrayParam

ConfigProto from tensorflow, serialized into byte array.
ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()
final def copy(extra: ParamMap): Estimator[NerDLModel]

Definition Classes
AnnotatorApproach → Estimator → PipelineStage → Params
def copyValues[T <: Params](to: T, extra: ParamMap): T

Attributes
protected
Definition Classes
Params
final def defaultCopy[T <: Params](extra: ParamMap): T

Attributes
protected
Definition Classes
Params
val description: String

Trains Tensorflow based Char-CNN-BLSTM model
Trains Tensorflow based Char-CNN-BLSTM model

Definition Classes
NerDLApproach → AnnotatorApproach
val dropout: FloatParam

"Dropout coefficient
val enableMemoryOptimizer: BooleanParam
val enableOutputLogs: BooleanParam

Whether to output to annotators log folder
val entities: StringArrayParam

Entities to recognize
Entities to recognize

Definition Classes
NerApproach
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
val evaluationLogExtended: BooleanParam

Whether logs for validation to be extended: it displays time and evaluation of each label.
Whether logs for validation to be extended: it displays time and evaluation of each label. Default is false.
def explainParam(param: Param[_]): String

Definition Classes
Params
def explainParams(): String

Definition Classes
Params
final def extractParamMap(): ParamMap

Definition Classes
Params
final def extractParamMap(extra: ParamMap): ParamMap

Definition Classes
Params
val features: ArrayBuffer[Feature[_, _, _]]

Definition Classes
HasFeatures
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def fit(dataset: Dataset[_]): NerDLModel

Definition Classes
AnnotatorApproach → Estimator
def fit(dataset: Dataset[_], paramMaps: Array[ParamMap]): Seq[NerDLModel]

Definition Classes
Estimator
Annotations
@Since( "2.0.0" )
def fit(dataset: Dataset[_], paramMap: ParamMap): NerDLModel

Definition Classes
Estimator
Annotations
@Since( "2.0.0" )
def fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): NerDLModel

Definition Classes
Estimator
Annotations
@Since( "2.0.0" ) @varargs()
def get[T](feature: StructFeature[T]): Option[T]

Attributes
protected
Definition Classes
HasFeatures
def get[K, V](feature: MapFeature[K, V]): Option[Map[K, V]]

Attributes
protected
Definition Classes
HasFeatures
def get[T](feature: SetFeature[T]): Option[Set[T]]

Attributes
protected
Definition Classes
HasFeatures
def get[T](feature: ArrayFeature[T]): Option[Array[T]]

Attributes
protected
Definition Classes
HasFeatures
final def get[T](param: Param[T]): Option[T]

Definition Classes
Params
def getBatchSize: Int

Batch size
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getConfigProtoBytes: Option[Array[Byte]]

ConfigProto from tensorflow, serialized into byte array.
ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()
def getDataSetParams(dsIt: Iterator[Array[(TextSentenceLabels, WordpieceEmbeddingsSentence)]]): (Set[String], Set[Char], Int, Long)
final def getDefault[T](param: Param[T]): Option[T]

Definition Classes
Params
def getDropout: Float

Dropout coefficient
def getEnableMemoryOptimizer: Boolean

Memory Optimizer
def getEnableOutputLogs: Boolean

Whether to output to annotators log folder
def getIncludeConfidence: Boolean

whether to include confidence scores in annotation metadata
def getInputCols: Array[String]

returns
input annotations columns currently used

Definition Classes
HasInputAnnotationCols
def getIteratorFunc(dataset: Dataset[Row]): () ⇒ Iterator[Array[(TextSentenceLabels, WordpieceEmbeddingsSentence)]]
def getLazyAnnotator: Boolean

Definition Classes
CanBeLazy
def getLogName: String

Definition Classes
NerDLApproach → Logging
def getLr: Float

Learning Rate
def getMaxEpochs: Int

Maximum number of epochs to train
Maximum number of epochs to train

Definition Classes
NerApproach
def getMinEpochs: Int

Minimum number of epochs to train
Minimum number of epochs to train

Definition Classes
NerApproach
final def getOrDefault[T](param: Param[T]): T

Definition Classes
Params
final def getOutputCol: String

Gets annotation column name going to generate
Gets annotation column name going to generate

Definition Classes
HasOutputAnnotationCol
def getOutputLogsPath: String
def getParam(paramName: String): Param[Any]

Definition Classes
Params
def getPo: Float

Learning rate decay coefficient.
Learning rate decay coefficient. Real Learning Rage = lr / (1 + po * epoch)
def getRandomSeed: Int

Random seed
Random seed

Definition Classes
NerApproach
def getUseContrib: Boolean

Whether to use contrib LSTM Cells.
Whether to use contrib LSTM Cells. Not compatible with Windows. Might slightly improve accuracy.
def getValidationSplit: Float

Choose the proportion of training dataset to be validated against the model on each Epoch.
Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off.
def getVerbose: Int

Level of verbosity during training
Level of verbosity during training

Definition Classes
NerApproach
val graphFolder: Param[String]

Folder path that contain external graph files
final def hasDefault[T](param: Param[T]): Boolean

Definition Classes
Params
def hasParam(paramName: String): Boolean

Definition Classes
Params
def hashCode(): Int

Definition Classes
AnyRef → Any
val includeConfidence: BooleanParam

val includeConfidence = new BooleanParam(this, "includeConfidence", "Whether to include confidence scores in annotation metadata")
def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean

Attributes
protected
Definition Classes
Logging
def initializeLogIfNecessary(isInterpreter: Boolean): Unit

Attributes
protected
Definition Classes
Logging
val inputAnnotatorTypes: Array[String]

Input annotator types : DOCUMENT, TOKEN, WORD_EMBEDDINGS
Input annotator types : DOCUMENT, TOKEN, WORD_EMBEDDINGS

Definition Classes
NerDLApproach → HasInputAnnotationCols
final val inputCols: StringArrayParam

columns that contain annotations necessary to run this annotator AnnotatorType is used both as input and output columns if not specified
columns that contain annotations necessary to run this annotator AnnotatorType is used both as input and output columns if not specified

Attributes
protected
Definition Classes
HasInputAnnotationCols
final def isDefined(param: Param[_]): Boolean

Definition Classes
Params
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def isSet(param: Param[_]): Boolean

Definition Classes
Params
def isTraceEnabled(): Boolean

Attributes
protected
Definition Classes
Logging
val labelColumn: Param[String]

Column with label per each token
Column with label per each token

Definition Classes
NerApproach
val lazyAnnotator: BooleanParam

Definition Classes
CanBeLazy
def log(value: ⇒ String, minLevel: Level): Unit

Attributes
protected
Definition Classes
Logging
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
val logger: Logger

Attributes
protected
Definition Classes
Logging
val lr: FloatParam

Learning Rate
val maxEpochs: IntParam

Maximum number of epochs to train
Maximum number of epochs to train

Definition Classes
NerApproach
val minEpochs: IntParam

Minimum number of epochs to train
Minimum number of epochs to train

Definition Classes
NerApproach
def msgHelper(schema: StructType): String

Attributes
protected
Definition Classes
HasInputAnnotationCols
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def onTrained(model: NerDLModel, spark: SparkSession): Unit

Definition Classes
AnnotatorApproach
def onWrite(path: String, spark: SparkSession): Unit

Attributes
protected
Definition Classes
ParamsAndFeaturesWritable
val outputAnnotatorType: String

Input annotator types : NAMED_ENTITY
Input annotator types : NAMED_ENTITY

Definition Classes
NerDLApproach → HasOutputAnnotatorType
final val outputCol: Param[String]

Attributes
protected
Definition Classes
HasOutputAnnotationCol
def outputLog(value: ⇒ String, uuid: String, shouldLog: Boolean, outputLogsPath: String): Unit

Attributes
protected
Definition Classes
Logging
val outputLogsPath: Param[String]
lazy val params: Array[Param[_]]

Definition Classes
Params
val po: FloatParam

Learning rate decay coefficient.
Learning rate decay coefficient. Real Learning Rage = lr / (1 + po * epoch)
val randomSeed: IntParam

Random seed
Random seed

Definition Classes
NerApproach
def save(path: String): Unit

Definition Classes
MLWritable
Annotations
@Since( "1.6.0" ) @throws( ... )
def set[T](feature: StructFeature[T], value: T): NerDLApproach.this.type

Attributes
protected
Definition Classes
HasFeatures
def set[K, V](feature: MapFeature[K, V], value: Map[K, V]): NerDLApproach.this.type

Attributes
protected
Definition Classes
HasFeatures
def set[T](feature: SetFeature[T], value: Set[T]): NerDLApproach.this.type

Attributes
protected
Definition Classes
HasFeatures
def set[T](feature: ArrayFeature[T], value: Array[T]): NerDLApproach.this.type

Attributes
protected
Definition Classes
HasFeatures
final def set(paramPair: ParamPair[_]): NerDLApproach.this.type

Attributes
protected
Definition Classes
Params
final def set(param: String, value: Any): NerDLApproach.this.type

Attributes
protected
Definition Classes
Params
final def set[T](param: Param[T], value: T): NerDLApproach.this.type

Definition Classes
Params
def setBatchSize(batch: Int): NerDLApproach.this.type

Batch size
def setConfigProtoBytes(bytes: Array[Int]): NerDLApproach.this.type

ConfigProto from tensorflow, serialized into byte array.
ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()
def setDefault[T](feature: StructFeature[T], value: () ⇒ T): NerDLApproach.this.type

Attributes
protected
Definition Classes
HasFeatures
def setDefault[K, V](feature: MapFeature[K, V], value: () ⇒ Map[K, V]): NerDLApproach.this.type

Attributes
protected
Definition Classes
HasFeatures
def setDefault[T](feature: SetFeature[T], value: () ⇒ Set[T]): NerDLApproach.this.type

Attributes
protected
Definition Classes
HasFeatures
def setDefault[T](feature: ArrayFeature[T], value: () ⇒ Array[T]): NerDLApproach.this.type

Attributes
protected
Definition Classes
HasFeatures
final def setDefault(paramPairs: ParamPair[_]*): NerDLApproach.this.type

Attributes
protected
Definition Classes
Params
final def setDefault[T](param: Param[T], value: T): NerDLApproach.this.type

Attributes
protected
Definition Classes
Params
def setDropout(dropout: Float): NerDLApproach.this.type

Dropout coefficient
def setEnableMemoryOptimizer(value: Boolean): NerDLApproach.this.type
def setEnableOutputLogs(enableOutputLogs: Boolean): NerDLApproach.this.type

Whether to output to annotators log folder
def setEntities(tags: Array[String]): NerDLApproach

Entities to recognize
Entities to recognize

Definition Classes
NerApproach
def setEvaluationLogExtended(evaluationLogExtended: Boolean): NerDLApproach.this.type

Whether logs for validation to be extended: it displays time and evaluation of each label.
Whether logs for validation to be extended: it displays time and evaluation of each label. Default is false.
def setGraphFolder(path: String): NerDLApproach.this.type

Folder path that contain external graph files
def setIncludeConfidence(value: Boolean): NerDLApproach.this.type

Whether to include confidence scores in annotation metadata
final def setInputCols(value: String*): NerDLApproach.this.type

Definition Classes
HasInputAnnotationCols
final def setInputCols(value: Array[String]): NerDLApproach.this.type

Overrides required annotators column if different than default
Overrides required annotators column if different than default

Definition Classes
HasInputAnnotationCols
def setLabelColumn(column: String): NerDLApproach

Column with label per each token
Column with label per each token

Definition Classes
NerApproach
def setLazyAnnotator(value: Boolean): NerDLApproach.this.type

Definition Classes
CanBeLazy
def setLr(lr: Float): NerDLApproach.this.type

Learning Rate
def setMaxEpochs(epochs: Int): NerDLApproach

Maximum number of epochs to train
Maximum number of epochs to train

Definition Classes
NerApproach
def setMinEpochs(epochs: Int): NerDLApproach

Minimum number of epochs to train
Minimum number of epochs to train

Definition Classes
NerApproach
final def setOutputCol(value: String): NerDLApproach.this.type

Overrides annotation column name when transforming
Overrides annotation column name when transforming

Definition Classes
HasOutputAnnotationCol
def setOutputLogsPath(path: String): NerDLApproach.this.type
def setPo(po: Float): NerDLApproach.this.type

Learning rate decay coefficient.
Learning rate decay coefficient. Real Learning Rage = lr / (1 + po * epoch)
def setRandomSeed(seed: Int): NerDLApproach

Random seed
Random seed

Definition Classes
NerApproach
def setTestDataset(er: ExternalResource): NerDLApproach.this.type

Path to test dataset.
Path to test dataset. If set used to calculate statistic on it during training.
def setTestDataset(path: String, readAs: Format = ReadAs.SPARK, options: Map[String, String] = Map("format" -> "parquet")): NerDLApproach.this.type

Path to test dataset.
Path to test dataset. If set used to calculate statistic on it during training.
def setUseContrib(value: Boolean): NerDLApproach.this.type

Whether to use contrib LSTM Cells.
Whether to use contrib LSTM Cells. Not compatible with Windows. Might slightly improve accuracy.
def setValidationSplit(validationSplit: Float): NerDLApproach.this.type

Choose the proportion of training dataset to be validated against the model on each Epoch.
Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off.
def setVerbose(verbose: Level): NerDLApproach

Level of verbosity during training
Level of verbosity during training

Definition Classes
NerApproach
def setVerbose(verbose: Int): NerDLApproach

Level of verbosity during training
Level of verbosity during training

Definition Classes
NerApproach
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
val testDataset: ExternalResourceParam

val testDataset = new ExternalResourceParam(this, "testDataset", "Path to test dataset.
val testDataset = new ExternalResourceParam(this, "testDataset", "Path to test dataset. If set used to calculate statistic on it during training.")
def toString(): String

Definition Classes
Identifiable → AnyRef → Any
def train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): NerDLModel

Definition Classes
NerDLApproach → AnnotatorApproach
final def transformSchema(schema: StructType): StructType

requirement for pipeline transformation validation.
requirement for pipeline transformation validation. It is called on fit()

Definition Classes
AnnotatorApproach → PipelineStage
def transformSchema(schema: StructType, logging: Boolean): StructType

Attributes
protected
Definition Classes
PipelineStage
Annotations
@DeveloperApi()
val uid: String

Definition Classes
NerDLApproach → Identifiable
val useContrib: BooleanParam

whether to use contrib LSTM Cells.
whether to use contrib LSTM Cells. Not compatible with Windows. Might slightly improve accuracy.
def validate(schema: StructType): Boolean

takes a Dataset and checks to see if all the required annotation types are present.
takes a Dataset and checks to see if all the required annotation types are present.
schema
to be validated
returns
True if all the required types are present, else false

Attributes
protected
Definition Classes
AnnotatorApproach
val validationSplit: FloatParam

Choose the proportion of training dataset to be validated against the model on each Epoch.
Choose the proportion of training dataset to be validated against the model on each Epoch. The value should be between 0.0 and 1.0 and by default it is 0.0 and off.
val verbose: IntParam

Level of verbosity during training
Level of verbosity during training

Definition Classes
NerApproach
val verboseLevel: Level

Definition Classes
NerDLApproach → Logging
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
def write: MLWriter

Definition Classes
ParamsAndFeaturesWritable → DefaultParamsWritable → MLWritable

Related Docs: object NerDLApproach | package dl

class NerDLApproach extends AnnotatorApproach[NerDLModel] with NerApproach[NerDLApproach] with Logging with ParamsAndFeaturesWritable

Instance Constructors

new NerDLApproach()

new NerDLApproach(uid: String)

Type Members

type AnnotatorType = String

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def $[T](param: Param[T]): T

def $$[T](feature: StructFeature[T]): T

def $$[K, V](feature: MapFeature[K, V]): Map[K, V]

def $$[T](feature: SetFeature[T]): Set[T]

def $$[T](feature: ArrayFeature[T]): Array[T]

final def ==(arg0: Any): Boolean

def _fit(dataset: Dataset[_], recursiveStages: Option[PipelineModel]): NerDLModel

final def asInstanceOf[T0]: T0

val batchSize: IntParam

def beforeTraining(spark: SparkSession): Unit

def calculateEmbeddingsDim(sentences: Seq[WordpieceEmbeddingsSentence]): Int

final def checkSchema(schema: StructType, inputAnnotatorType: String): Boolean

final def clear(param: Param[_]): NerDLApproach.this.type

def clone(): AnyRef

val configProtoBytes: IntArrayParam

final def copy(extra: ParamMap): Estimator[NerDLModel]

def copyValues[T <: Params](to: T, extra: ParamMap): T

final def defaultCopy[T <: Params](extra: ParamMap): T

val description: String

val dropout: FloatParam

val enableMemoryOptimizer: BooleanParam

val enableOutputLogs: BooleanParam

val entities: StringArrayParam

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

val evaluationLogExtended: BooleanParam

def explainParam(param: Param[_]): String

def explainParams(): String

final def extractParamMap(): ParamMap

final def extractParamMap(extra: ParamMap): ParamMap

val features: ArrayBuffer[Feature[_, _, _]]

def finalize(): Unit

final def fit(dataset: Dataset[_]): NerDLModel

def fit(dataset: Dataset[_], paramMaps: Array[ParamMap]): Seq[NerDLModel]

def fit(dataset: Dataset[_], paramMap: ParamMap): NerDLModel

def fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): NerDLModel

def get[T](feature: StructFeature[T]): Option[T]

def get[K, V](feature: MapFeature[K, V]): Option[Map[K, V]]

def get[T](feature: SetFeature[T]): Option[Set[T]]

def get[T](feature: ArrayFeature[T]): Option[Array[T]]

final def get[T](param: Param[T]): Option[T]

def getBatchSize: Int

final def getClass(): Class[_]

def getConfigProtoBytes: Option[Array[Byte]]

def getDataSetParams(dsIt: Iterator[Array[(TextSentenceLabels, WordpieceEmbeddingsSentence)]]): (Set[String], Set[Char], Int, Long)

final def getDefault[T](param: Param[T]): Option[T]

def getDropout: Float

def getEnableMemoryOptimizer: Boolean

def getEnableOutputLogs: Boolean

def getIncludeConfidence: Boolean

def getInputCols: Array[String]

def getIteratorFunc(dataset: Dataset[Row]): () ⇒ Iterator[Array[(TextSentenceLabels, WordpieceEmbeddingsSentence)]]

def getLazyAnnotator: Boolean

def getLogName: String

def getLr: Float

def getMaxEpochs: Int

def getMinEpochs: Int

final def getOrDefault[T](param: Param[T]): T

final def getOutputCol: String

def getOutputLogsPath: String

def getParam(paramName: String): Param[Any]

def getPo: Float

def getRandomSeed: Int

def getUseContrib: Boolean

def getValidationSplit: Float

def getVerbose: Int

val graphFolder: Param[String]

final def hasDefault[T](param: Param[T]): Boolean

def hasParam(paramName: String): Boolean