class BertTokenizer extends BasicTokenizer

Linear Supertypes
BasicTokenizer, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. BertTokenizer
  2. BasicTokenizer
  3. AnyRef
  4. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. Protected

Instance Constructors

  1. new BertTokenizer(vocab: Map[String, Int], specialTokens: SpecialTokens)

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##: Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  5. val bytesToUnicodeMapping: Map[Int, String]

    Mapping for bytes to a different set of unicode characters (especially white spaces).

    Mapping for bytes to a different set of unicode characters (especially white spaces). This improved model performance for gpt-2

    Attributes
    protected
  6. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.CloneNotSupportedException]) @HotSpotIntrinsicCandidate() @native()
  7. def decodeTokens(tokens: Array[Int]): String
  8. def encode(sentences: Seq[(WordpieceTokenizedSentence, Int)], maxSequenceLength: Int): Seq[Array[Int]]

    Encode the input sequence to indexes IDs adding padding where necessary

  9. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  10. def equals(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef → Any
  11. final def getClass(): Class[_ <: AnyRef]
    Definition Classes
    AnyRef → Any
    Annotations
    @HotSpotIntrinsicCandidate() @native()
  12. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @HotSpotIntrinsicCandidate() @native()
  13. def isChinese(char: Char): Boolean
    Definition Classes
    BasicTokenizer
  14. def isControl(char: Char): Boolean
    Definition Classes
    BasicTokenizer
  15. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  16. def isPunctuation(char: Char): Boolean
    Definition Classes
    BasicTokenizer
  17. def isToFilter(char: Char): Boolean
    Definition Classes
    BasicTokenizer
  18. def isWhitespace(char: Char): Boolean
    Definition Classes
    BasicTokenizer
  19. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  20. def normalize(text: String): String
    Definition Classes
    BasicTokenizer
  21. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @HotSpotIntrinsicCandidate() @native()
  22. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @HotSpotIntrinsicCandidate() @native()
  23. val specialTokens: SpecialTokens
  24. def stripAccents(text: String): String
    Definition Classes
    BasicTokenizer
  25. final def synchronized[T0](arg0: => T0): T0
    Definition Classes
    AnyRef
  26. def toString(): String
    Definition Classes
    AnyRef → Any
  27. def tokenize(sentence: Sentence): Array[IndexedToken]

    sentence

    input Sentence which can be a full sentence or just a token in type of Sentence

    Definition Classes
    BasicTokenizer
  28. val vocab: Map[String, Int]
  29. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.InterruptedException])
  30. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.InterruptedException]) @native()
  31. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.InterruptedException])

Deprecated Value Members

  1. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.Throwable]) @Deprecated
    Deprecated

    (Since version 9)

Inherited from BasicTokenizer

Inherited from AnyRef

Inherited from Any

Ungrouped