public abstract class BaseWARCWriterProcessor extends WriterPoolProcessor implements org.archive.io.warc.WARCWriterPoolSettings
Modifier and Type | Field and Description |
---|---|
protected org.archive.uid.RecordIDGenerator |
generator
Generator for record IDs
|
protected ConcurrentMap<String,ConcurrentMap<String,AtomicLong>> |
stats |
protected AtomicLong |
urlsWritten |
ANNOTATION_UNWRITTEN, compress, directory, frequentFlushes, maxFileSizeBytes, maxTotalBytesToWrite, maxWaitForIdleMs, poolMaxActive, prefix, serverCache, skipIdenticalDigests, startNewFilesOnCheckpoint, storePaths, template, writeBufferSize
Constructor and Description |
---|
BaseWARCWriterProcessor() |
Modifier and Type | Method and Description |
---|---|
protected void |
addIfNotBlank(org.archive.util.anvl.ANVLRecord record,
String label,
String value) |
protected void |
addStats(Map<String,Map<String,Long>> substats) |
protected Map<String,Map<String,Long>> |
copyStats(Map<String,Map<String,Long>> orig) |
long |
getDefaultMaxFileSize() |
List<ConfigPath> |
getDefaultStorePaths() |
List<String> |
getMetadata() |
protected URI |
getRecordID() |
org.archive.uid.RecordIDGenerator |
getRecordIDGenerator() |
ConcurrentMap<String,ConcurrentMap<String,AtomicLong>> |
getStats() |
String |
report() |
void |
setRecordIDGenerator(org.archive.uid.RecordIDGenerator generator) |
protected void |
setupPool(AtomicInteger serialNo)
Set up pool of files.
|
protected void |
updateMetadataAfterWrite(CrawlURI curi,
org.archive.io.warc.WARCWriter writer,
long startPosition) |
addTotalBytesWritten, calcOutputDirs, checkBytesWritten, copyForwardWriteTagIfDupe, doCheckpoint, fromCheckpointJson, getCompress, getDirectory, getFrequentFlushes, getHostAddress, getMaxFileSizeBytes, getMaxTotalBytesToWrite, getMaxWaitForIdleMs, getMetadataProvider, getPool, getPoolMaxActive, getPrefix, getSerialNo, getServerCache, getSkipIdenticalDigests, getStartNewFilesOnCheckpoint, getStorePaths, getTemplate, getTotalBytesWritten, getWriteBufferSize, innerProcess, innerProcessResult, innerRejectProcess, setCompress, setDirectory, setFrequentFlushes, setMaxFileSizeBytes, setMaxTotalBytesToWrite, setMaxWaitForIdleMs, setMetadataProvider, setPool, setPoolMaxActive, setPrefix, setServerCache, setSkipIdenticalDigests, setStartNewFilesOnCheckpoint, setStorePaths, setTemplate, setTotalBytesWritten, setWriteBufferSize, shouldProcess, shouldWrite, start, stop, toCheckpointJson
finishCheckpoint, flattenVia, getBeanName, getEnabled, getKeyedProperties, getRecordedSize, getShouldProcessRule, getURICount, hasHttpAuthenticationCredential, isRunning, isSuccess, process, setBeanName, setEnabled, setRecoveryCheckpoint, setShouldProcessRule, startCheckpoint
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
calcOutputDirs, getCompress, getFrequentFlushes, getMaxFileSizeBytes, getPrefix, getTemplate, getWriteBufferSize
finishCheckpoint, setRecoveryCheckpoint, startCheckpoint
protected AtomicLong urlsWritten
protected ConcurrentMap<String,ConcurrentMap<String,AtomicLong>> stats
protected org.archive.uid.RecordIDGenerator generator
public ConcurrentMap<String,ConcurrentMap<String,AtomicLong>> getStats()
public org.archive.uid.RecordIDGenerator getRecordIDGenerator()
getRecordIDGenerator
in interface org.archive.io.warc.WARCWriterPoolSettings
public void setRecordIDGenerator(org.archive.uid.RecordIDGenerator generator)
protected URI getRecordID() throws IOException
IOException
public long getDefaultMaxFileSize()
getDefaultMaxFileSize
in class WriterPoolProcessor
public List<ConfigPath> getDefaultStorePaths()
getDefaultStorePaths
in class WriterPoolProcessor
protected void setupPool(AtomicInteger serialNo)
WriterPoolProcessor
setupPool
in class WriterPoolProcessor
public List<String> getMetadata()
getMetadata
in interface WriterPoolSettings
getMetadata
in class WriterPoolProcessor
protected void addIfNotBlank(org.archive.util.anvl.ANVLRecord record, String label, String value)
protected void updateMetadataAfterWrite(CrawlURI curi, org.archive.io.warc.WARCWriter writer, long startPosition)
Copyright © 2003–2022 Internet Archive. All rights reserved.