public class Analysis extends Object
Modifier and Type | Field and Description |
---|---|
static com.google.common.collect.ImmutableMap<String,Set<?>> |
namedStopWords |
Constructor and Description |
---|
Analysis() |
Modifier and Type | Method and Description |
---|---|
static boolean |
generatesCharacterTokenStream(org.apache.lucene.analysis.Analyzer analyzer,
String fieldName)
Check whether
TokenStream s generated with analyzer
provide with character terms. |
static Reader |
getReaderFromFile(Environment env,
Settings settings,
String settingPrefix) |
static List<String> |
getWordList(Environment env,
Settings settings,
String settingPrefix)
Fetches a list of words from the specified settings file.
|
static org.apache.lucene.analysis.util.CharArraySet |
getWordSet(Environment env,
Settings settings,
String settingsPrefix) |
static boolean |
isCharacterTokenStream(org.apache.lucene.analysis.TokenStream tokenStream)
Check whether the provided token stream is able to provide character
terms.
|
static boolean |
isNoStopwords(Settings settings) |
static List<String> |
loadWordList(Reader reader,
String comment) |
static org.apache.lucene.util.Version |
parseAnalysisVersion(Settings indexSettings,
Settings settings,
ESLogger logger) |
static org.apache.lucene.analysis.util.CharArraySet |
parseArticles(Environment env,
Settings settings) |
static org.apache.lucene.analysis.util.CharArraySet |
parseCommonWords(Environment env,
Settings settings,
org.apache.lucene.analysis.util.CharArraySet defaultCommonWords,
boolean ignoreCase) |
static org.apache.lucene.analysis.util.CharArraySet |
parseStemExclusion(Settings settings,
org.apache.lucene.analysis.util.CharArraySet defaultStemExclusion) |
static org.apache.lucene.analysis.util.CharArraySet |
parseStopWords(Environment env,
Settings settings,
org.apache.lucene.analysis.util.CharArraySet defaultStopWords) |
static org.apache.lucene.analysis.util.CharArraySet |
parseStopWords(Environment env,
Settings settings,
org.apache.lucene.analysis.util.CharArraySet defaultStopWords,
boolean ignoreCase) |
static org.apache.lucene.analysis.util.CharArraySet |
parseWords(Environment env,
Settings settings,
String name,
org.apache.lucene.analysis.util.CharArraySet defaultWords,
Map<String,Set<?>> namedWords,
boolean ignoreCase) |
public static org.apache.lucene.util.Version parseAnalysisVersion(Settings indexSettings, Settings settings, ESLogger logger)
public static boolean isNoStopwords(Settings settings)
public static org.apache.lucene.analysis.util.CharArraySet parseStemExclusion(Settings settings, org.apache.lucene.analysis.util.CharArraySet defaultStemExclusion)
public static org.apache.lucene.analysis.util.CharArraySet parseWords(Environment env, Settings settings, String name, org.apache.lucene.analysis.util.CharArraySet defaultWords, Map<String,Set<?>> namedWords, boolean ignoreCase)
public static org.apache.lucene.analysis.util.CharArraySet parseCommonWords(Environment env, Settings settings, org.apache.lucene.analysis.util.CharArraySet defaultCommonWords, boolean ignoreCase)
public static org.apache.lucene.analysis.util.CharArraySet parseArticles(Environment env, Settings settings)
public static org.apache.lucene.analysis.util.CharArraySet parseStopWords(Environment env, Settings settings, org.apache.lucene.analysis.util.CharArraySet defaultStopWords)
public static org.apache.lucene.analysis.util.CharArraySet parseStopWords(Environment env, Settings settings, org.apache.lucene.analysis.util.CharArraySet defaultStopWords, boolean ignoreCase)
public static org.apache.lucene.analysis.util.CharArraySet getWordSet(Environment env, Settings settings, String settingsPrefix)
public static List<String> getWordList(Environment env, Settings settings, String settingPrefix)
IllegalArgumentException
- If the word list cannot be found at either key.public static List<String> loadWordList(Reader reader, String comment) throws IOException
IOException
public static Reader getReaderFromFile(Environment env, Settings settings, String settingPrefix)
null
.IllegalArgumentException
- If the Reader can not be instantiated.public static boolean isCharacterTokenStream(org.apache.lucene.analysis.TokenStream tokenStream)
Although most analyzers generate character terms (CharTermAttribute),
some token only contain binary terms (BinaryTermAttribute,
CharTermAttribute being a special type of BinaryTermAttribute), such as
NumericTokenStream
and unsuitable for highlighting and
more-like-this queries which expect character terms.
public static boolean generatesCharacterTokenStream(org.apache.lucene.analysis.Analyzer analyzer, String fieldName) throws IOException
TokenStream
s generated with analyzer
provide with character terms.IOException
isCharacterTokenStream(TokenStream)
Copyright © 2009–2017. All rights reserved.