com
.
johnsnowlabs
.
nlp
.
util
.
io
CustomStripper
Related Doc:
package io
class
CustomStripper
extends
PDFTextStripper
Linear Supertypes
PDFTextStripper
,
LegacyPDFStreamEngine
,
PDFStreamEngine
,
AnyRef
,
Any
Ordering
Alphabetic
By Inheritance
Inherited
CustomStripper
PDFTextStripper
LegacyPDFStreamEngine
PDFStreamEngine
AnyRef
Any
Hide All
Show All
Visibility
Public
All
Instance Constructors
new
CustomStripper
()
Value Members
final
def
!=
(
arg0:
Any
)
:
Boolean
Definition Classes
AnyRef → Any
final
def
##
()
:
Int
Definition Classes
AnyRef → Any
final
def
==
(
arg0:
Any
)
:
Boolean
Definition Classes
AnyRef → Any
final
def
addOperator
(
arg0:
OperatorProcessor
)
:
Unit
Definition Classes
PDFStreamEngine
def
applyTextAdjustment
(
arg0:
Float
,
arg1:
Float
)
:
Unit
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
final
def
asInstanceOf
[
T0
]
:
T0
Definition Classes
Any
def
beginText
()
:
Unit
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
clone
()
:
AnyRef
Attributes
protected[
java.lang
]
Definition Classes
AnyRef
Annotations
@throws
(
...
)
def
endArticle
()
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
Annotations
@throws
(
classOf[java.io.IOException]
)
def
endDocument
(
arg0:
PDDocument
)
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
Annotations
@throws
(
classOf[java.io.IOException]
)
def
endPage
(
arg0:
PDPage
)
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
Annotations
@throws
(
classOf[java.io.IOException]
)
def
endText
()
:
Unit
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
final
def
eq
(
arg0:
AnyRef
)
:
Boolean
Definition Classes
AnyRef
def
equals
(
arg0:
Any
)
:
Boolean
Definition Classes
AnyRef → Any
def
finalize
()
:
Unit
Attributes
protected[
java.lang
]
Definition Classes
AnyRef
Annotations
@throws
(
classOf[java.lang.Throwable]
)
def
getAddMoreFormatting
()
:
Boolean
Definition Classes
PDFTextStripper
def
getAppearance
(
arg0:
PDAnnotation
)
:
PDAppearanceStream
Definition Classes
PDFStreamEngine
def
getArticleEnd
()
:
String
Definition Classes
PDFTextStripper
def
getArticleStart
()
:
String
Definition Classes
PDFTextStripper
def
getAverageCharTolerance
()
:
Float
Definition Classes
PDFTextStripper
def
getCharactersByArticle
()
:
List
[
List
[
TextPosition
]]
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
final
def
getClass
()
:
Class
[_]
Definition Classes
AnyRef → Any
def
getCurrentPage
()
:
PDPage
Definition Classes
PDFStreamEngine
def
getCurrentPageNo
()
:
Int
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
def
getDropThreshold
()
:
Float
Definition Classes
PDFTextStripper
def
getEndBookmark
()
:
PDOutlineItem
Definition Classes
PDFTextStripper
def
getEndPage
()
:
Int
Definition Classes
PDFTextStripper
def
getGraphicsStackSize
()
:
Int
Definition Classes
PDFStreamEngine
def
getGraphicsState
()
:
PDGraphicsState
Definition Classes
PDFStreamEngine
def
getIndentThreshold
()
:
Float
Definition Classes
PDFTextStripper
def
getInitialMatrix
()
:
Matrix
Definition Classes
PDFStreamEngine
def
getLineSeparator
()
:
String
Definition Classes
PDFTextStripper
def
getListItemPatterns
()
:
List
[
Pattern
]
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
def
getOutput
()
:
Writer
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
def
getPageEnd
()
:
String
Definition Classes
PDFTextStripper
def
getPageStart
()
:
String
Definition Classes
PDFTextStripper
def
getParagraphEnd
()
:
String
Definition Classes
PDFTextStripper
def
getParagraphStart
()
:
String
Definition Classes
PDFTextStripper
def
getResources
()
:
PDResources
Definition Classes
PDFStreamEngine
def
getSeparateByBeads
()
:
Boolean
Definition Classes
PDFTextStripper
def
getSortByPosition
()
:
Boolean
Definition Classes
PDFTextStripper
def
getSpacingTolerance
()
:
Float
Definition Classes
PDFTextStripper
def
getStartBookmark
()
:
PDOutlineItem
Definition Classes
PDFTextStripper
def
getStartPage
()
:
Int
Definition Classes
PDFTextStripper
def
getSuppressDuplicateOverlappingText
()
:
Boolean
Definition Classes
PDFTextStripper
def
getText
(
doc:
PDDocument
)
:
String
Definition Classes
CustomStripper
→ PDFTextStripper
def
getTextLineMatrix
()
:
Matrix
Definition Classes
PDFStreamEngine
def
getTextMatrix
()
:
Matrix
Definition Classes
PDFStreamEngine
def
getWordSeparator
()
:
String
Definition Classes
PDFTextStripper
def
hashCode
()
:
Int
Definition Classes
AnyRef → Any
final
def
isInstanceOf
[
T0
]
:
Boolean
Definition Classes
Any
final
def
ne
(
arg0:
AnyRef
)
:
Boolean
Definition Classes
AnyRef
final
def
notify
()
:
Unit
Definition Classes
AnyRef
final
def
notifyAll
()
:
Unit
Definition Classes
AnyRef
def
operatorException
(
arg0:
Operator
,
arg1:
List
[
COSBase
]
,
arg2:
IOException
)
:
Unit
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
processAnnotation
(
arg0:
PDAnnotation
,
arg1:
PDAppearanceStream
)
:
Unit
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
processChildStream
(
arg0:
PDContentStream
,
arg1:
PDPage
)
:
Unit
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
processOperator
(
arg0:
Operator
,
arg1:
List
[
COSBase
]
)
:
Unit
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
processOperator
(
arg0:
String
,
arg1:
List
[
COSBase
]
)
:
Unit
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
processPage
(
arg0:
PDPage
)
:
Unit
Definition Classes
PDFTextStripper → LegacyPDFStreamEngine → PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
processPages
(
arg0:
PDPageTree
)
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
Annotations
@throws
(
classOf[java.io.IOException]
)
def
processSoftMask
(
arg0:
PDTransparencyGroup
)
:
Unit
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
processTextPosition
(
arg0:
TextPosition
)
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper → LegacyPDFStreamEngine
final
def
processTilingPattern
(
arg0:
PDTilingPattern
,
arg1:
PDColor
,
arg2:
PDColorSpace
,
arg3:
Matrix
)
:
Unit
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
final
def
processTilingPattern
(
arg0:
PDTilingPattern
,
arg1:
PDColor
,
arg2:
PDColorSpace
)
:
Unit
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
processTransparencyGroup
(
arg0:
PDTransparencyGroup
)
:
Unit
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
processType3Stream
(
arg0:
PDType3CharProc
,
arg1:
Matrix
)
:
Unit
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
final
def
restoreGraphicsStack
(
arg0:
Stack
[
PDGraphicsState
]
)
:
Unit
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
def
restoreGraphicsState
()
:
Unit
Definition Classes
PDFStreamEngine
final
def
saveGraphicsStack
()
:
Stack
[
PDGraphicsState
]
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
def
saveGraphicsState
()
:
Unit
Definition Classes
PDFStreamEngine
def
setAddMoreFormatting
(
arg0:
Boolean
)
:
Unit
Definition Classes
PDFTextStripper
def
setArticleEnd
(
arg0:
String
)
:
Unit
Definition Classes
PDFTextStripper
def
setArticleStart
(
arg0:
String
)
:
Unit
Definition Classes
PDFTextStripper
def
setAverageCharTolerance
(
arg0:
Float
)
:
Unit
Definition Classes
PDFTextStripper
def
setDropThreshold
(
arg0:
Float
)
:
Unit
Definition Classes
PDFTextStripper
def
setEndBookmark
(
arg0:
PDOutlineItem
)
:
Unit
Definition Classes
PDFTextStripper
def
setEndPage
(
arg0:
Int
)
:
Unit
Definition Classes
PDFTextStripper
def
setIndentThreshold
(
arg0:
Float
)
:
Unit
Definition Classes
PDFTextStripper
def
setLineDashPattern
(
arg0:
COSArray
,
arg1:
Int
)
:
Unit
Definition Classes
PDFStreamEngine
def
setLineSeparator
(
arg0:
String
)
:
Unit
Definition Classes
PDFTextStripper
def
setListItemPatterns
(
arg0:
List
[
Pattern
]
)
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
def
setPageEnd
(
arg0:
String
)
:
Unit
Definition Classes
PDFTextStripper
def
setPageStart
(
arg0:
String
)
:
Unit
Definition Classes
PDFTextStripper
def
setParagraphEnd
(
arg0:
String
)
:
Unit
Definition Classes
PDFTextStripper
def
setParagraphStart
(
arg0:
String
)
:
Unit
Definition Classes
PDFTextStripper
def
setShouldSeparateByBeads
(
arg0:
Boolean
)
:
Unit
Definition Classes
PDFTextStripper
def
setSortByPosition
(
arg0:
Boolean
)
:
Unit
Definition Classes
PDFTextStripper
def
setSpacingTolerance
(
arg0:
Float
)
:
Unit
Definition Classes
PDFTextStripper
def
setStartBookmark
(
arg0:
PDOutlineItem
)
:
Unit
Definition Classes
PDFTextStripper
def
setStartPage
(
arg0:
Int
)
:
Unit
Definition Classes
PDFTextStripper
def
setSuppressDuplicateOverlappingText
(
arg0:
Boolean
)
:
Unit
Definition Classes
PDFTextStripper
def
setTextLineMatrix
(
arg0:
Matrix
)
:
Unit
Definition Classes
PDFStreamEngine
def
setTextMatrix
(
arg0:
Matrix
)
:
Unit
Definition Classes
PDFStreamEngine
def
setWordSeparator
(
arg0:
String
)
:
Unit
Definition Classes
PDFTextStripper
def
showAnnotation
(
arg0:
PDAnnotation
)
:
Unit
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
showFontGlyph
(
arg0:
Matrix
,
arg1:
PDFont
,
arg2:
Int
,
arg3:
String
,
arg4:
Vector
)
:
Unit
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
showForm
(
arg0:
PDFormXObject
)
:
Unit
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
showGlyph
(
arg0:
Matrix
,
arg1:
PDFont
,
arg2:
Int
,
arg3:
String
,
arg4:
Vector
)
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
LegacyPDFStreamEngine → PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
showText
(
arg0:
Array
[
Byte
]
)
:
Unit
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
showTextString
(
arg0:
Array
[
Byte
]
)
:
Unit
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
showTextStrings
(
arg0:
COSArray
)
:
Unit
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
showTransparencyGroup
(
arg0:
PDTransparencyGroup
)
:
Unit
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
showType3Glyph
(
arg0:
Matrix
,
arg1:
PDType3Font
,
arg2:
Int
,
arg3:
String
,
arg4:
Vector
)
:
Unit
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
def
startArticle
(
arg0:
Boolean
)
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
Annotations
@throws
(
classOf[java.io.IOException]
)
def
startArticle
()
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
Annotations
@throws
(
classOf[java.io.IOException]
)
def
startDocument
(
arg0:
PDDocument
)
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
Annotations
@throws
(
classOf[java.io.IOException]
)
def
startPage
(
page:
PDPage
)
:
Unit
Attributes
protected[
com.johnsnowlabs.nlp.util.io
]
Definition Classes
CustomStripper
→ PDFTextStripper
final
def
synchronized
[
T0
]
(
arg0: ⇒
T0
)
:
T0
Definition Classes
AnyRef
def
toString
()
:
String
Definition Classes
AnyRef → Any
def
transformWidth
(
arg0:
Float
)
:
Float
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
def
transformedPoint
(
arg0:
Float
,
arg1:
Float
)
:
Float
Definition Classes
PDFStreamEngine
def
unsupportedOperator
(
arg0:
Operator
,
arg1:
List
[
COSBase
]
)
:
Unit
Attributes
protected[
org.apache.pdfbox.contentstream
]
Definition Classes
PDFStreamEngine
Annotations
@throws
(
classOf[java.io.IOException]
)
final
def
wait
()
:
Unit
Definition Classes
AnyRef
Annotations
@throws
(
...
)
final
def
wait
(
arg0:
Long
,
arg1:
Int
)
:
Unit
Definition Classes
AnyRef
Annotations
@throws
(
...
)
final
def
wait
(
arg0:
Long
)
:
Unit
Definition Classes
AnyRef
Annotations
@throws
(
...
)
def
writeCharacters
(
arg0:
TextPosition
)
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
Annotations
@throws
(
classOf[java.io.IOException]
)
def
writeLineSeparator
()
:
Unit
Attributes
protected[
com.johnsnowlabs.nlp.util.io
]
Definition Classes
CustomStripper
→ PDFTextStripper
def
writePage
()
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
Annotations
@throws
(
classOf[java.io.IOException]
)
def
writePageEnd
()
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
Annotations
@throws
(
classOf[java.io.IOException]
)
def
writePageStart
()
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
Annotations
@throws
(
classOf[java.io.IOException]
)
def
writeParagraphEnd
()
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
Annotations
@throws
(
classOf[java.io.IOException]
)
def
writeParagraphSeparator
()
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
Annotations
@throws
(
classOf[java.io.IOException]
)
def
writeParagraphStart
()
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
Annotations
@throws
(
classOf[java.io.IOException]
)
def
writeString
(
text:
String
,
textPositions:
List
[
TextPosition
]
)
:
Unit
Attributes
protected[
com.johnsnowlabs.nlp.util.io
]
Definition Classes
CustomStripper
→ PDFTextStripper
def
writeString
(
arg0:
String
)
:
Unit
Attributes
protected[
org.apache.pdfbox.text
]
Definition Classes
PDFTextStripper
Annotations
@throws
(
classOf[java.io.IOException]
)
def
writeText
(
arg0:
PDDocument
,
arg1:
Writer
)
:
Unit
Definition Classes
PDFTextStripper
Annotations
@throws
(
classOf[java.io.IOException]
)
def
writeWordSeparator
()
:
Unit
Attributes
protected[
com.johnsnowlabs.nlp.util.io
]
Definition Classes
CustomStripper
→ PDFTextStripper
Deprecated Value Members
def
registerOperatorProcessor
(
arg0:
String
,
arg1:
OperatorProcessor
)
:
Unit
Definition Classes
PDFStreamEngine
Annotations
@Deprecated
@deprecated
Deprecated
(Since version )
see corresponding Javadoc for more information.
Inherited from
PDFTextStripper
Inherited from
LegacyPDFStreamEngine
Inherited from
PDFStreamEngine
Inherited from
AnyRef
Inherited from
Any
Ungrouped