Class

com.johnsnowlabs.nlp.util.io

CustomStripper

Related Doc: package io

Permalink

class CustomStripper extends PDFTextStripper

Linear Supertypes
PDFTextStripper, LegacyPDFStreamEngine, PDFStreamEngine, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. CustomStripper
  2. PDFTextStripper
  3. LegacyPDFStreamEngine
  4. PDFStreamEngine
  5. AnyRef
  6. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new CustomStripper()

    Permalink

Value Members

  1. final def !=(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  4. final def addOperator(arg0: OperatorProcessor): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
  5. def applyTextAdjustment(arg0: Float, arg1: Float): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  6. final def asInstanceOf[T0]: T0

    Permalink
    Definition Classes
    Any
  7. def beginText(): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  8. def clone(): AnyRef

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  9. def endArticle(): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  10. def endDocument(arg0: PDDocument): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  11. def endPage(arg0: PDPage): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  12. def endText(): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  13. final def eq(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  14. def equals(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  15. def finalize(): Unit

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  16. def getAddMoreFormatting(): Boolean

    Permalink
    Definition Classes
    PDFTextStripper
  17. def getAppearance(arg0: PDAnnotation): PDAppearanceStream

    Permalink
    Definition Classes
    PDFStreamEngine
  18. def getArticleEnd(): String

    Permalink
    Definition Classes
    PDFTextStripper
  19. def getArticleStart(): String

    Permalink
    Definition Classes
    PDFTextStripper
  20. def getAverageCharTolerance(): Float

    Permalink
    Definition Classes
    PDFTextStripper
  21. def getCharactersByArticle(): List[List[TextPosition]]

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
  22. final def getClass(): Class[_]

    Permalink
    Definition Classes
    AnyRef → Any
  23. def getCurrentPage(): PDPage

    Permalink
    Definition Classes
    PDFStreamEngine
  24. def getCurrentPageNo(): Int

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
  25. def getDropThreshold(): Float

    Permalink
    Definition Classes
    PDFTextStripper
  26. def getEndBookmark(): PDOutlineItem

    Permalink
    Definition Classes
    PDFTextStripper
  27. def getEndPage(): Int

    Permalink
    Definition Classes
    PDFTextStripper
  28. def getGraphicsStackSize(): Int

    Permalink
    Definition Classes
    PDFStreamEngine
  29. def getGraphicsState(): PDGraphicsState

    Permalink
    Definition Classes
    PDFStreamEngine
  30. def getIndentThreshold(): Float

    Permalink
    Definition Classes
    PDFTextStripper
  31. def getInitialMatrix(): Matrix

    Permalink
    Definition Classes
    PDFStreamEngine
  32. def getLineSeparator(): String

    Permalink
    Definition Classes
    PDFTextStripper
  33. def getListItemPatterns(): List[Pattern]

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
  34. def getOutput(): Writer

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
  35. def getPageEnd(): String

    Permalink
    Definition Classes
    PDFTextStripper
  36. def getPageStart(): String

    Permalink
    Definition Classes
    PDFTextStripper
  37. def getParagraphEnd(): String

    Permalink
    Definition Classes
    PDFTextStripper
  38. def getParagraphStart(): String

    Permalink
    Definition Classes
    PDFTextStripper
  39. def getResources(): PDResources

    Permalink
    Definition Classes
    PDFStreamEngine
  40. def getSeparateByBeads(): Boolean

    Permalink
    Definition Classes
    PDFTextStripper
  41. def getSortByPosition(): Boolean

    Permalink
    Definition Classes
    PDFTextStripper
  42. def getSpacingTolerance(): Float

    Permalink
    Definition Classes
    PDFTextStripper
  43. def getStartBookmark(): PDOutlineItem

    Permalink
    Definition Classes
    PDFTextStripper
  44. def getStartPage(): Int

    Permalink
    Definition Classes
    PDFTextStripper
  45. def getSuppressDuplicateOverlappingText(): Boolean

    Permalink
    Definition Classes
    PDFTextStripper
  46. def getText(doc: PDDocument): String

    Permalink
    Definition Classes
    CustomStripper → PDFTextStripper
  47. def getTextLineMatrix(): Matrix

    Permalink
    Definition Classes
    PDFStreamEngine
  48. def getTextMatrix(): Matrix

    Permalink
    Definition Classes
    PDFStreamEngine
  49. def getWordSeparator(): String

    Permalink
    Definition Classes
    PDFTextStripper
  50. def hashCode(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  51. final def isInstanceOf[T0]: Boolean

    Permalink
    Definition Classes
    Any
  52. final def ne(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  53. final def notify(): Unit

    Permalink
    Definition Classes
    AnyRef
  54. final def notifyAll(): Unit

    Permalink
    Definition Classes
    AnyRef
  55. def operatorException(arg0: Operator, arg1: List[COSBase], arg2: IOException): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  56. def processAnnotation(arg0: PDAnnotation, arg1: PDAppearanceStream): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  57. def processChildStream(arg0: PDContentStream, arg1: PDPage): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  58. def processOperator(arg0: Operator, arg1: List[COSBase]): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  59. def processOperator(arg0: String, arg1: List[COSBase]): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  60. def processPage(arg0: PDPage): Unit

    Permalink
    Definition Classes
    PDFTextStripper → LegacyPDFStreamEngine → PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  61. def processPages(arg0: PDPageTree): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  62. def processSoftMask(arg0: PDTransparencyGroup): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  63. def processTextPosition(arg0: TextPosition): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper → LegacyPDFStreamEngine
  64. final def processTilingPattern(arg0: PDTilingPattern, arg1: PDColor, arg2: PDColorSpace, arg3: Matrix): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  65. final def processTilingPattern(arg0: PDTilingPattern, arg1: PDColor, arg2: PDColorSpace): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  66. def processTransparencyGroup(arg0: PDTransparencyGroup): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  67. def processType3Stream(arg0: PDType3CharProc, arg1: Matrix): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  68. final def restoreGraphicsStack(arg0: Stack[PDGraphicsState]): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
  69. def restoreGraphicsState(): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
  70. final def saveGraphicsStack(): Stack[PDGraphicsState]

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
  71. def saveGraphicsState(): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
  72. def setAddMoreFormatting(arg0: Boolean): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  73. def setArticleEnd(arg0: String): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  74. def setArticleStart(arg0: String): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  75. def setAverageCharTolerance(arg0: Float): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  76. def setDropThreshold(arg0: Float): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  77. def setEndBookmark(arg0: PDOutlineItem): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  78. def setEndPage(arg0: Int): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  79. def setIndentThreshold(arg0: Float): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  80. def setLineDashPattern(arg0: COSArray, arg1: Int): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
  81. def setLineSeparator(arg0: String): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  82. def setListItemPatterns(arg0: List[Pattern]): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
  83. def setPageEnd(arg0: String): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  84. def setPageStart(arg0: String): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  85. def setParagraphEnd(arg0: String): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  86. def setParagraphStart(arg0: String): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  87. def setShouldSeparateByBeads(arg0: Boolean): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  88. def setSortByPosition(arg0: Boolean): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  89. def setSpacingTolerance(arg0: Float): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  90. def setStartBookmark(arg0: PDOutlineItem): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  91. def setStartPage(arg0: Int): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  92. def setSuppressDuplicateOverlappingText(arg0: Boolean): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  93. def setTextLineMatrix(arg0: Matrix): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
  94. def setTextMatrix(arg0: Matrix): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
  95. def setWordSeparator(arg0: String): Unit

    Permalink
    Definition Classes
    PDFTextStripper
  96. def showAnnotation(arg0: PDAnnotation): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  97. def showFontGlyph(arg0: Matrix, arg1: PDFont, arg2: Int, arg3: String, arg4: Vector): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  98. def showForm(arg0: PDFormXObject): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  99. def showGlyph(arg0: Matrix, arg1: PDFont, arg2: Int, arg3: String, arg4: Vector): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    LegacyPDFStreamEngine → PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  100. def showText(arg0: Array[Byte]): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  101. def showTextString(arg0: Array[Byte]): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  102. def showTextStrings(arg0: COSArray): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  103. def showTransparencyGroup(arg0: PDTransparencyGroup): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  104. def showType3Glyph(arg0: Matrix, arg1: PDType3Font, arg2: Int, arg3: String, arg4: Vector): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  105. def startArticle(arg0: Boolean): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  106. def startArticle(): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  107. def startDocument(arg0: PDDocument): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  108. def startPage(page: PDPage): Unit

    Permalink
    Attributes
    protected[com.johnsnowlabs.nlp.util.io]
    Definition Classes
    CustomStripper → PDFTextStripper
  109. final def synchronized[T0](arg0: ⇒ T0): T0

    Permalink
    Definition Classes
    AnyRef
  110. def toString(): String

    Permalink
    Definition Classes
    AnyRef → Any
  111. def transformWidth(arg0: Float): Float

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
  112. def transformedPoint(arg0: Float, arg1: Float): Float

    Permalink
    Definition Classes
    PDFStreamEngine
  113. def unsupportedOperator(arg0: Operator, arg1: List[COSBase]): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.contentstream]
    Definition Classes
    PDFStreamEngine
    Annotations
    @throws( classOf[java.io.IOException] )
  114. final def wait(): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  115. final def wait(arg0: Long, arg1: Int): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  116. final def wait(arg0: Long): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  117. def writeCharacters(arg0: TextPosition): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  118. def writeLineSeparator(): Unit

    Permalink
    Attributes
    protected[com.johnsnowlabs.nlp.util.io]
    Definition Classes
    CustomStripper → PDFTextStripper
  119. def writePage(): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  120. def writePageEnd(): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  121. def writePageStart(): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  122. def writeParagraphEnd(): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  123. def writeParagraphSeparator(): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  124. def writeParagraphStart(): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  125. def writeString(text: String, textPositions: List[TextPosition]): Unit

    Permalink
    Attributes
    protected[com.johnsnowlabs.nlp.util.io]
    Definition Classes
    CustomStripper → PDFTextStripper
  126. def writeString(arg0: String): Unit

    Permalink
    Attributes
    protected[org.apache.pdfbox.text]
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  127. def writeText(arg0: PDDocument, arg1: Writer): Unit

    Permalink
    Definition Classes
    PDFTextStripper
    Annotations
    @throws( classOf[java.io.IOException] )
  128. def writeWordSeparator(): Unit

    Permalink
    Attributes
    protected[com.johnsnowlabs.nlp.util.io]
    Definition Classes
    CustomStripper → PDFTextStripper

Deprecated Value Members

  1. def registerOperatorProcessor(arg0: String, arg1: OperatorProcessor): Unit

    Permalink
    Definition Classes
    PDFStreamEngine
    Annotations
    @Deprecated @deprecated
    Deprecated

    (Since version ) see corresponding Javadoc for more information.

Inherited from PDFTextStripper

Inherited from LegacyPDFStreamEngine

Inherited from PDFStreamEngine

Inherited from AnyRef

Inherited from Any

Ungrouped