lamp.data.bert

Type members

Classlikes

case class BertData(maskedTokens: STen, segments: STen, predictionPositions: STen, maskedLanguageModelTarget: STen, nextSentenceTarget: STen)

Value members

Concrete methods

def makeMaskForMaskedLanguageModel(bertTokens: Array[Int], maximumTokenId: Int, clsToken: Int, sepToken: Int, maskToken: Int, rng: Random): (Array[Int], Array[Int], Array[Int])
def minibatchesFromFull(minibatchSize: Int, dropLast: Boolean, fullData: BertData, rng: Random): BatchStream[BertLossInput, Int, Unit]
def pad(v: Array[Int], paddedLength: Int, padElem: Int): Array[Int]
def prepareFullDatasetFromTokenizedParagraphs[S : Sc](paragraphs: Vector[Vector[Array[Int]]], maximumTokenId: Int, clsToken: Int, sepToken: Int, padToken: Int, maskToken: Int, maxLength: Int, rng: Random): BertData
def prepareParagraph[S : Sc](paragraph: Vector[Array[Int]], maximumTokenId: Int, clsToken: Int, sepToken: Int, padToken: Int, maskToken: Int, maxLength: Int, rng: Random): Vector[(Boolean, STen, STen, STen, STen)]