public class GenericXMLToTxtCollectionReader extends AbstractTermSuiteCollectionReader
collectionType, droppedTags, PARAM_COLLECTION_TYPE, PARAM_DROPPED_TAGS, PARAM_ENCODING, PARAM_INPUTDIR, PARAM_LANGUAGE, PARAM_TXT_TAGS, txtTags
Constructor and Description |
---|
GenericXMLToTxtCollectionReader() |
Modifier and Type | Method and Description |
---|---|
protected java.lang.String |
getDocumentText(java.lang.String absolutePath,
java.lang.String encoding)
Gives the document text to set from the input file URI.
|
protected java.io.FilenameFilter |
getFileFilter()
The
FilenameFilter for selecting input files to read. |
void |
initialize() |
protected void |
lastFileRead()
A hook that is executed after the last input files has been read.
|
close, fillCas, getFiles, getNext, getProgress, hasNext
destroy, getCasInitializer, getProcessingResourceMetaData, initialize, isConsuming, reconfigure, setCasInitializer, typeSystemInit
getConfigParameterValue, getConfigParameterValue, setConfigParameterValue, setConfigParameterValue
getCasManager, getLogger, getMetaData, getResourceManager, getUimaContext, getUimaContextAdmin, setLogger, setMetaData
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
public void initialize() throws org.apache.uima.resource.ResourceInitializationException
initialize
in class AbstractTermSuiteCollectionReader
org.apache.uima.resource.ResourceInitializationException
protected java.lang.String getDocumentText(java.lang.String absolutePath, java.lang.String encoding) throws java.io.IOException
AbstractTermSuiteCollectionReader
getDocumentText
in class AbstractTermSuiteCollectionReader
java.io.IOException
protected java.io.FilenameFilter getFileFilter()
AbstractTermSuiteCollectionReader
FilenameFilter
for selecting input files to read.getFileFilter
in class AbstractTermSuiteCollectionReader
protected void lastFileRead()
AbstractTermSuiteCollectionReader
lastFileRead
in class AbstractTermSuiteCollectionReader