com.sksamuel.elastic4s.analysis

Type members

Classlikes

case class Analysis(analyzers: List[Analyzer], tokenizers: List[Tokenizer], tokenFilters: List[TokenFilter], charFilters: List[CharFilter], normalizers: List[Normalizer])
Companion:
object
object Analysis
Companion:
class
trait Analyzer
trait Builder[T]
case class CharGroupTokenizer(name: String, tokenizeOnChars: List[String]) extends Tokenizer
case class ClassicTokenizer(name: String, maxTokenLength: Int) extends Tokenizer
case class CommonGramsTokenFilter(name: String, commonWords: Iterable[String], commonWordsPath: Option[String], ignoreCase: Option[Boolean], queryMode: Option[Boolean]) extends TokenFilter
case class CompoundWordTokenFilter(name: String, `type`: CompoundWordTokenFilterType, wordList: Iterable[String], wordListPath: Option[String], hyphenationPatternsPath: Option[String], minWordSize: Option[Int], minSubwordSize: Option[Int], maxSubwordSize: Option[Int], onlyLongestMatch: Option[Boolean]) extends TokenFilter
case class CustomAnalyzer(name: String, tokenizer: String, charFilters: List[String], tokenFilters: List[String], positionIncrementGap: Int) extends Analyzer

When the built-in analyzers do not fulfil your needs, you can create a custom analyzer which uses the appropriate combination of:

When the built-in analyzers do not fulfil your needs, you can create a custom analyzer which uses the appropriate combination of:

zero or more character filters a tokenizer zero or more token filters.

Reference these by name, and if they are custom or configurable add them to the analysis definition.

case class CustomNormalizer(name: String, charFilters: List[String], tokenFilters: List[String]) extends Normalizer
case class EdgeNGramTokenFilter(name: String, minGram: Int, maxGram: Int, side: Option[String]) extends TokenFilter
case class EdgeNGramTokenizer(name: String, minGram: Int, maxGram: Int, tokenChars: Iterable[String]) extends Tokenizer
case class ElisionTokenFilter(name: String, articles: Seq[String]) extends TokenFilter
case class FingerprintAnalyzer(name: String, separator: Option[String], stopwords: Iterable[String], maxOutputSize: Int) extends Analyzer
case class HunspellTokenFilter(name: String, locale: String, dedup: Option[Boolean], longestOnly: Option[Boolean], dictionary: Option[String]) extends TokenFilter
case class ICUTokenizer(name: String, ruleFiles: String) extends Tokenizer
case class KeywordMarkerTokenFilter(name: String, keywords: Seq[String], keywordsPath: Option[String], keywordsPattern: Option[String], ignoreCase: Option[Boolean]) extends TokenFilter
case class KeywordTokenizer(name: String, bufferSize: Int) extends Tokenizer
case class LengthTokenFilter(name: String, min: Option[Int], max: Option[Int]) extends TokenFilter
case class LimitTokenCountTokenFilter(name: String, maxTokenCount: Option[Int], consumeAllTokens: Option[Boolean]) extends TokenFilter
case class MappingCharFilter(name: String, mappings: Map[String, String]) extends CharFilter
case class MultiplexerTokenFilter(name: String, filters: Seq[String], preserveOriginal: Option[Boolean]) extends TokenFilter
case class NGramTokenFilter(name: String, minGram: Option[Int], maxGram: Option[Int]) extends TokenFilter
case class NGramTokenizer(name: String, minGram: Int, maxGram: Int, tokenChars: Iterable[String]) extends Tokenizer
case class PathHierarchyTokenizer(name: String, delimiter: Char, replacement: Char, bufferSize: Int, reverse: Boolean, skip: Int) extends Tokenizer
case class PatternAnalyzer(name: String, regex: String, lowercase: Boolean) extends Analyzer
case class PatternCaptureTokenFilter(name: String, patterns: Seq[String], preserveOriginal: Boolean) extends TokenFilter
case class PatternReplaceCharFilter(name: String, pattern: String, replacement: String) extends CharFilter
case class PatternReplaceTokenFilter(name: String, pattern: String, replacement: String) extends TokenFilter
case class PatternTokenizer(name: String, pattern: String, flags: String, group: Int) extends Tokenizer
case class ShingleTokenFilter(name: String, maxShingleSize: Option[Int], minShingleSize: Option[Int], outputUnigrams: Option[Boolean], outputUnigramsIfNoShingles: Option[Boolean], tokenSeparator: Option[String], fillerToken: Option[String]) extends TokenFilter
case class SnowballTokenFilter(name: String, language: String) extends TokenFilter
case class StandardAnalyzer(name: String, stopwords: Iterable[String], maxTokenLength: Int) extends Analyzer
case class StandardTokenizer(name: String, maxTokenLength: Int) extends Tokenizer
case class StemmerOverrideTokenFilter(name: String, rules: Seq[String], rulesPath: Option[String]) extends TokenFilter
case class StemmerTokenFilter(name: String, lang: String) extends TokenFilter
case class StopAnalyzer(name: String, stopwords: Iterable[String]) extends Analyzer
case class StopTokenFilter(name: String, language: Option[String], stopwords: Iterable[String], stopwordsPath: Option[String], enablePositionIncrements: Option[Boolean], removeTrailing: Option[Boolean], ignoreCase: Option[Boolean]) extends TokenFilter
case class SynonymGraphTokenFilter(name: String, path: Option[String], synonyms: Set[String], ignoreCase: Option[Boolean], format: Option[String], expand: Option[Boolean], tokenizer: Option[String]) extends TokenFilter
case class SynonymTokenFilter(name: String, path: Option[String], synonyms: Set[String], ignoreCase: Option[Boolean], format: Option[String], expand: Option[Boolean], tokenizer: Option[String]) extends TokenFilter
case class ThaiTokenizer(name: String) extends Tokenizer
trait Tokenizer
case class TruncateTokenFilter(name: String, length: Int) extends TokenFilter
case class UaxUrlEmailTokenizer(name: String, maxTokenLength: Int) extends Tokenizer
case class UniqueTokenFilter(name: String, onlyOnSamePosition: Boolean) extends TokenFilter
case class WhitespaceTokenizer(name: String, maxTokenLength: Int) extends Tokenizer
case class WordDelimiterGraphTokenFilter(name: String, preserveOriginal: Option[Boolean], catenateNumbers: Option[Boolean], catenateWords: Option[Boolean], catenateAll: Option[Boolean], generateWordParts: Option[Boolean], generateNumberParts: Option[Boolean], protectedWords: Option[String], protectedWordsPath: Option[String], splitOnCaseChange: Option[Boolean], splitOnNumerics: Option[Boolean], stem_english_possessive: Option[Boolean], typeTable: Option[String], typeTablePath: Option[String]) extends TokenFilter
case class WordDelimiterTokenFilter(name: String, generateWordParts: Option[Boolean], generateNumberParts: Option[Boolean], catenateWords: Option[Boolean], catenateNumbers: Option[Boolean], catenateAll: Option[Boolean], splitOnCaseChange: Option[Boolean], protectedWords: Option[String], protectedWordsPath: Option[String], preserveOriginal: Option[Boolean], splitOnNumerics: Option[Boolean], stemEnglishPossesive: Option[Boolean], typeTable: Option[String], typeTablePath: Option[String]) extends TokenFilter