public abstract class AbstractTermSuiteCollectionReader
extends org.apache.uima.collection.CollectionReader_ImplBase
CollectionException
implementation for TermSuite that
recursively load all selected files from an input directory, with customizable file filter
and document text parser.
of an inputModifier and Type | Field and Description |
---|---|
protected TermSuiteCollection |
collectionType |
protected java.lang.String[] |
droppedTags |
static java.lang.String |
PARAM_COLLECTION_TYPE |
static java.lang.String |
PARAM_DROPPED_TAGS |
static java.lang.String |
PARAM_ENCODING |
static java.lang.String |
PARAM_INPUTDIR |
static java.lang.String |
PARAM_LANGUAGE |
static java.lang.String |
PARAM_TXT_TAGS |
protected java.lang.String[] |
txtTags |
Constructor and Description |
---|
AbstractTermSuiteCollectionReader() |
Modifier and Type | Method and Description |
---|---|
void |
close() |
protected void |
fillCas(org.apache.uima.cas.CAS cas,
java.io.File file) |
protected abstract java.lang.String |
getDocumentText(java.lang.String uri,
java.lang.String encoding)
Gives the document text to set from the input file URI.
|
protected java.io.FilenameFilter |
getFileFilter()
The
FilenameFilter for selecting input files to read. |
java.util.List<java.io.File> |
getFiles() |
void |
getNext(org.apache.uima.cas.CAS cas) |
org.apache.uima.util.Progress[] |
getProgress() |
boolean |
hasNext() |
void |
initialize() |
protected void |
lastFileRead()
A hook that is executed after the last input files has been read.
|
destroy, getCasInitializer, getProcessingResourceMetaData, initialize, isConsuming, reconfigure, setCasInitializer, typeSystemInit
getConfigParameterValue, getConfigParameterValue, setConfigParameterValue, setConfigParameterValue
getCasManager, getLogger, getMetaData, getResourceManager, getUimaContext, getUimaContextAdmin, setLogger, setMetaData
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
public static final java.lang.String PARAM_INPUTDIR
public static final java.lang.String PARAM_ENCODING
public static final java.lang.String PARAM_LANGUAGE
public static final java.lang.String PARAM_COLLECTION_TYPE
protected TermSuiteCollection collectionType
public static final java.lang.String PARAM_DROPPED_TAGS
protected java.lang.String[] droppedTags
public static final java.lang.String PARAM_TXT_TAGS
protected java.lang.String[] txtTags
public void initialize() throws org.apache.uima.resource.ResourceInitializationException
initialize
in class org.apache.uima.collection.CollectionReader_ImplBase
org.apache.uima.resource.ResourceInitializationException
public void getNext(org.apache.uima.cas.CAS cas) throws java.io.IOException, org.apache.uima.collection.CollectionException
java.io.IOException
org.apache.uima.collection.CollectionException
protected void fillCas(org.apache.uima.cas.CAS cas, java.io.File file) throws java.io.IOException, org.apache.uima.collection.CollectionException
java.io.IOException
org.apache.uima.collection.CollectionException
public boolean hasNext() throws java.io.IOException, org.apache.uima.collection.CollectionException
java.io.IOException
org.apache.uima.collection.CollectionException
public org.apache.uima.util.Progress[] getProgress()
public void close() throws java.io.IOException
java.io.IOException
public java.util.List<java.io.File> getFiles()
protected java.io.FilenameFilter getFileFilter()
FilenameFilter
for selecting input files to read.protected abstract java.lang.String getDocumentText(java.lang.String uri, java.lang.String encoding) throws java.io.IOException
uri
- java.io.IOException
protected void lastFileRead()