Packages

t

com.johnsnowlabs.nlp

HasLlamaCppModelProperties

trait HasLlamaCppModelProperties extends AnyRef

Contains settable model parameters for the AutoGGUFModel.

Self Type
HasLlamaCppModelProperties with ParamsAndFeaturesWritable with HasProtectedParams
Linear Supertypes
AnyRef, Any
Ordering
  1. Grouped
  2. Alphabetic
  3. By Inheritance
Inherited
  1. HasLlamaCppModelProperties
  2. AnyRef
  3. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. Protected

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##: Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  5. val chatTemplate: Param[String]

  6. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.CloneNotSupportedException]) @HotSpotIntrinsicCandidate() @native()
  7. val defragmentationThreshold: FloatParam

  8. val disableLog: BooleanParam

  9. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  10. def equals(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef → Any
  11. val flashAttention: BooleanParam

  12. def getChatTemplate: String

  13. final def getClass(): Class[_ <: AnyRef]
    Definition Classes
    AnyRef → Any
    Annotations
    @HotSpotIntrinsicCandidate() @native()
  14. def getDefragmentationThreshold: Float

  15. def getDisableLog: Boolean

  16. def getFlashAttention: Boolean

  17. def getLogVerbosity: Int
  18. def getMainGpu: Int

  19. def getMetadata: String

    Get the metadata for the model

  20. def getMetadataMap: Map[String, Map[String, String]]
  21. def getModelDraft: String

  22. def getModelParameters: ModelParameters
    Attributes
    protected
  23. def getNBatch: Int

  24. def getNCtx: Int

  25. def getNDraft: Int

  26. def getNGpuLayers: Int

  27. def getNGpuLayersDraft: Int

  28. def getNThreads: Int

  29. def getNThreadsBatch: Int

  30. def getNUbatch: Int

  31. def getNoKvOffload: Boolean

  32. def getNuma: String

  33. def getRopeFreqBase: Float

  34. def getRopeFreqScale: Float

  35. def getRopeScalingType: String

  36. def getSplitMode: String

  37. def getSystemPrompt: String

  38. def getUseMlock: Boolean

  39. def getUseMmap: Boolean

  40. def getYarnAttnFactor: Float

  41. def getYarnBetaFast: Float

  42. def getYarnBetaSlow: Float

  43. def getYarnExtFactor: Float

  44. def getYarnOrigCtx: Int

  45. val gpuSplitMode: Param[String]

    Set how to split the model across GPUs

    Set how to split the model across GPUs

    • NONE: No GPU split
    • LAYER: Split the model across GPUs by layer
    • ROW: Split the model across GPUs by rows
  46. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @HotSpotIntrinsicCandidate() @native()
  47. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  48. val logVerbosity: IntParam

  49. val logger: Logger
    Attributes
    protected
  50. val mainGpu: IntParam

  51. val metadata: (HasLlamaCppModelProperties.this)#ProtectedParam[String]
  52. val modelDraft: Param[String]

  53. val nBatch: IntParam

  54. val nCtx: IntParam

  55. val nDraft: IntParam

  56. val nGpuLayers: IntParam

  57. val nGpuLayersDraft: IntParam

  58. val nThreads: IntParam

  59. val nThreadsBatch: IntParam

  60. val nUbatch: IntParam

  61. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  62. val noKvOffload: BooleanParam

  63. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @HotSpotIntrinsicCandidate() @native()
  64. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @HotSpotIntrinsicCandidate() @native()
  65. val numaStrategy: Param[String]

    Set optimization strategies that help on some NUMA systems (if available)

    Set optimization strategies that help on some NUMA systems (if available)

    Available Strategies:

    • DISABLED: No NUMA optimizations
    • DISTRIBUTE: Spread execution evenly over all
    • ISOLATE: Only spawn threads on CPUs on the node that execution started on
    • NUMA_CTL: Use the CPU map provided by numactl
    • MIRROR: Mirrors the model across NUMA nodes
  66. val ropeFreqBase: FloatParam

  67. val ropeFreqScale: FloatParam

  68. val ropeScalingType: Param[String]

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    • UNSPECIFIED: Don't use any scaling
    • LINEAR: Linear scaling
    • YARN: YaRN RoPE scaling
  69. def setChatTemplate(chatTemplate: String): HasLlamaCppModelProperties.this

    The chat template to use

  70. def setDefragmentationThreshold(defragThold: Float): HasLlamaCppModelProperties.this

    Set the KV cache defragmentation threshold

  71. def setDisableLog(disableLog: Boolean): HasLlamaCppModelProperties.this

  72. def setFlashAttention(flashAttention: Boolean): HasLlamaCppModelProperties.this

    Whether to enable Flash Attention

  73. def setGpuSplitMode(splitMode: String): HasLlamaCppModelProperties.this

    Set how to split the model across GPUs

    Set how to split the model across GPUs

    • NONE: No GPU split -LAYER: Split the model across GPUs by layer 2. ROW: Split the model across GPUs by rows
  74. def setLogVerbosity(logVerbosity: Int): HasLlamaCppModelProperties.this

    Set the verbosity threshold.

    Set the verbosity threshold. Messages with a higher verbosity will be ignored.

    Values map to the following:

    • GGML_LOG_LEVEL_NONE = 0
    • GGML_LOG_LEVEL_DEBUG = 1
    • GGML_LOG_LEVEL_INFO = 2
    • GGML_LOG_LEVEL_WARN = 3
    • GGML_LOG_LEVEL_ERROR = 4
    • GGML_LOG_LEVEL_CONT = 5 (continue previous log)
  75. def setMainGpu(mainGpu: Int): HasLlamaCppModelProperties.this

    Set the GPU that is used for scratch and small tensors

  76. def setMetadata(metadata: String): HasLlamaCppModelProperties.this

    Set the metadata for the model

  77. def setModelDraft(modelDraft: String): HasLlamaCppModelProperties.this

    Set the draft model for speculative decoding

  78. def setNBatch(nBatch: Int): HasLlamaCppModelProperties.this

    Set the logical batch size for prompt processing (must be >=32 to use BLAS)

  79. def setNCtx(nCtx: Int): HasLlamaCppModelProperties.this

    Set the size of the prompt context

  80. def setNDraft(nDraft: Int): HasLlamaCppModelProperties.this

    Set the number of tokens to draft for speculative decoding

  81. def setNGpuLayers(nGpuLayers: Int): HasLlamaCppModelProperties.this

    Set the number of layers to store in VRAM (-1 - use default)

  82. def setNGpuLayersDraft(nGpuLayersDraft: Int): HasLlamaCppModelProperties.this

    Set the number of layers to store in VRAM for the draft model (-1 - use default)

  83. def setNThreads(nThreads: Int): HasLlamaCppModelProperties.this

    Set the number of threads to use during generation

  84. def setNThreadsBatch(nThreadsBatch: Int): HasLlamaCppModelProperties.this

    Set the number of threads to use during batch and prompt processing

  85. def setNUbatch(nUbatch: Int): HasLlamaCppModelProperties.this

    Set the physical batch size for prompt processing (must be >=32 to use BLAS)

  86. def setNoKvOffload(noKvOffload: Boolean): HasLlamaCppModelProperties.this

    Whether to disable KV offload

  87. def setNumaStrategy(numa: String): HasLlamaCppModelProperties.this

    Set optimization strategies that help on some NUMA systems (if available)

    Set optimization strategies that help on some NUMA systems (if available)

    Available Strategies:

    • DISABLED: No NUMA optimizations
    • DISTRIBUTE: spread execution evenly over all
    • ISOLATE: only spawn threads on CPUs on the node that execution started on
    • NUMA_CTL: use the CPU map provided by numactl
    • MIRROR: Mirrors the model across NUMA nodes
  88. def setRopeFreqBase(ropeFreqBase: Float): HasLlamaCppModelProperties.this

    Set the RoPE base frequency, used by NTK-aware scaling

  89. def setRopeFreqScale(ropeFreqScale: Float): HasLlamaCppModelProperties.this

    Set the RoPE frequency scaling factor, expands context by a factor of 1/N

  90. def setRopeScalingType(ropeScalingType: String): HasLlamaCppModelProperties.this

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    Set the RoPE frequency scaling method, defaults to linear unless specified by the model.

    • NONE: Don't use any scaling
    • LINEAR: Linear scaling
    • YARN: YaRN RoPE scaling
  91. def setSystemPrompt(systemPrompt: String): HasLlamaCppModelProperties.this

    Set a system prompt to use

  92. def setUseMlock(useMlock: Boolean): HasLlamaCppModelProperties.this

    Whether to force the system to keep model in RAM rather than swapping or compressing

  93. def setUseMmap(useMmap: Boolean): HasLlamaCppModelProperties.this

    Whether to use memory-map model (faster load but may increase pageouts if not using mlock)

  94. def setYarnAttnFactor(yarnAttnFactor: Float): HasLlamaCppModelProperties.this

    Set the YaRN scale sqrt(t) or attention magnitude

  95. def setYarnBetaFast(yarnBetaFast: Float): HasLlamaCppModelProperties.this

    Set the YaRN low correction dim or beta

  96. def setYarnBetaSlow(yarnBetaSlow: Float): HasLlamaCppModelProperties.this

    Set the YaRN high correction dim or alpha

  97. def setYarnExtFactor(yarnExtFactor: Float): HasLlamaCppModelProperties.this

    Set the YaRN extrapolation mix factor

  98. def setYarnOrigCtx(yarnOrigCtx: Int): HasLlamaCppModelProperties.this

    Set the YaRN original context size of model

  99. final def synchronized[T0](arg0: => T0): T0
    Definition Classes
    AnyRef
  100. val systemPrompt: Param[String]

  101. def toString(): String
    Definition Classes
    AnyRef → Any
  102. val useMlock: BooleanParam

  103. val useMmap: BooleanParam

  104. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.InterruptedException])
  105. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.InterruptedException]) @native()
  106. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.InterruptedException])
  107. val yarnAttnFactor: FloatParam

  108. val yarnBetaFast: FloatParam

  109. val yarnBetaSlow: FloatParam

  110. val yarnExtFactor: FloatParam

  111. val yarnOrigCtx: IntParam

Deprecated Value Members

  1. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws(classOf[java.lang.Throwable]) @Deprecated
    Deprecated

    (Since version 9)

Inherited from AnyRef

Inherited from Any

Parameter setters

Parameter getters

Parameters

A list of (hyper-)parameter keys this annotator can take. Users can set and get the parameter values through setters and getters, respectively.

Ungrouped