scala.xml.parsing

XhtmlParser

class XhtmlParser extends ConstructingHandler with MarkupParser with ExternalSources

An XML Parser that preserves CDATA blocks and knows about HtmlEntities .

(c) David Pollak, 2007 WorldWide Conferencing, LLC.

go to: companion
source: XhtmlParser.scala

Inherited

  1. Hide All
  2. Show all
  1. ExternalSources
  2. MarkupParser
  3. MarkupParserCommon
  4. TokenTests
  5. ConstructingHandler
  6. MarkupHandler
  7. Logged
  8. AnyRef
  9. Any

Visibility

  1. Public
  2. All

Instance constructors

  1. new XhtmlParser(input: Source)

Type Members

  1. type AttributesType = (MetaData, NamespaceBinding)

  2. type ElementType = NodeSeq

  3. type InputType = Source

  4. type NamespaceType = NamespaceBinding

  5. type PositionType = Int

Value Members

  1. def !=(arg0: AnyRef): Boolean

  2. def !=(arg0: Any): Boolean

    o != arg0 is the same as !(o == (arg0)).

  3. def ##(): Int

  4. def $asInstanceOf[T0](): T0

  5. def $isInstanceOf[T0](): Boolean

  6. def ==(arg0: AnyRef): Boolean

    o == arg0 is the same as if (o eq null) arg0 eq null else o.equals(arg0).

  7. def ==(arg0: Any): Boolean

    o == arg0 is the same as o.equals(arg0).

  8. def appendText(pos: Int, ts: NodeBuffer, txt: String): Unit

  9. def asInstanceOf[T0]: T0

    This method is used to cast the receiver object to be of type T0.

  10. def attListDecl(name: String, attList: List[AttrDecl]): Unit

  11. def attrDecl(): Unit

    <! attlist := ATTLIST

  12. val cbuf: StringBuilder

    character buffer, for names

  13. var ch: Char

    holds the next character

  14. def ch_returning_nextch: Char

  15. def checkPubID(s: String): Boolean

  16. def checkSysID(s: String): Boolean

  17. def clone(): AnyRef

    This method creates and returns a copy of the receiver object .

  18. def comment(pos: Int, txt: String): Comment

    callback method invoked by MarkupParser after parsing comment .

  19. def content(pscope: NamespaceBinding): NodeSeq

    content1 ::= '<' content1 | '&' charref .

  20. def content1(pscope: NamespaceBinding, ts: NodeBuffer): Unit

    '<' content1 ::= .

  21. var curInput: Source

  22. var decls: List[Decl]

  23. var doc: Document

  24. def document(): Document

    [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') [25] Eq ::= S? '=' S? [26] VersionNum ::= '1 .

  25. var dtd: DTD

  26. def elem(pos: Int, pre: String, label: String, attrs: MetaData, pscope: NamespaceBinding, nodes: NodeSeq): NodeSeq

    callback method invoked by MarkupParser after parsing an element, between the elemStart and elemEnd callbacks

  27. def elemDecl(n: String, cmstr: String): Unit

  28. def elemEnd(pos: Int, pre: String, label: String): Unit

    callback method invoked by MarkupParser after end-tag of element .

  29. def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding): Unit

    callback method invoked by MarkupParser after start-tag of element .

  30. def element(pscope: NamespaceBinding): NodeSeq

  31. def element1(pscope: NamespaceBinding): NodeSeq

    '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag | xmlTag1 '/' '>'

  32. def elementDecl(): Unit

    <! element := ELEMENT

  33. def endDTD(n: String): Unit

  34. var ent: Map[String, EntityDecl]

  35. def entityDecl(): Unit

    <! element := ELEMENT

  36. def entityRef(pos: Int, n: String): EntityRef

    callback method invoked by MarkupParser after parsing entity ref .

  37. var eof: Boolean

  38. def eq(arg0: AnyRef): Boolean

    This method is used to test whether the argument (arg0) is a reference to the receiver object (this).

  39. def equals(arg0: Any): Boolean

    This method is used to compare the receiver object (this) with the argument object (arg0) for equivalence.

  40. def errorAndResult[T](msg: String, x: T): T

  41. def errorNoEnd(tag: String): Nothing

  42. var extIndex: Int

  43. def extSubset(): Unit

  44. def externalID(): ExternalID

    externalID ::= SYSTEM S syslit PUBLIC S pubid S syslit

  45. def externalSource(systemId: String): Source

    .

  46. def finalize(): Unit

    This method is called by the garbage collector on the receiver object when garbage collection determines that there are no more references to the object .

  47. def getClass(): java.lang.Class[_ <: java.lang.Object]

    Returns a representation that corresponds to the dynamic class of the receiver object .

  48. def hashCode(): Int

    Returns a hash code value for the object .

  49. def initialize: MarkupParser with MarkupHandler

    As the current code requires you to call nextch once manually after construction, this method formalizes that suboptimal reality .

  50. var inpStack: List[Source]

    stack of inputs

  51. val input: Source

  52. def intSubset(): Unit

    "rec-xml/#ExtSubset" pe references may not occur within markup declarations

  53. def isAlpha(c: Char): Boolean

    These are 99% sure to be redundant but refactoring on the safe side .

  54. def isAlphaDigit(c: Char): Boolean

  55. def isInstanceOf[T0]: Boolean

    This method is used to test whether the dynamic type of the receiver object is T0.

  56. def isName(s: String): Boolean

    Name ::= ( Letter | '_' ) (NameChar)*

  57. def isNameChar(ch: Char): Boolean

    NameChar ::= Letter | Digit | ' .

  58. def isNameStart(ch: Char): Boolean

    NameStart ::= ( Letter | '_' ) where Letter means in one of the Unicode general categories { Ll, Lu, Lo, Lt, Nl }

  59. def isPubIDChar(ch: Char): Boolean

  60. def isSpace(cs: Seq[Char]): Boolean

    (#x20 | #x9 | #xD | #xA)+

  61. def isSpace(ch: Char): Boolean

    (#x20 | #x9 | #xD | #xA)

  62. def isValidIANAEncoding(ianaEncoding: Seq[Char]): Boolean

    Returns true if the encoding name is a valid IANA encoding .

  63. val isValidating: Boolean

    returns true is this markup handler is validating

  64. def log(msg: String): Unit

    This method should log the message given as argument somewhere as a side-effect .

  65. def lookahead(): BufferedIterator[Char]

    Create a lookahead reader which does not influence the input

  66. def lookupElemDecl(Label: String): ElemDecl

  67. def markupDecl(): Unit

  68. def markupDecl1(): Any

  69. def mkAttributes(name: String, pscope: NamespaceBinding): (MetaData, NamespaceBinding)

  70. def mkProcInstr(position: Int, name: String, text: String): NodeSeq

  71. def ne(arg0: AnyRef): Boolean

    o.ne(arg0) is the same as !(o.eq(arg0)).

  72. def nextch: Char

    this method assign the next character to ch and advances in input

  73. def notationDecl(): Unit

    'N' notationDecl ::= "OTATION"

  74. def notationDecl(notat: String, extID: ExternalID): Unit

  75. def notify(): Unit

    Wakes up a single thread that is waiting on the receiver object's monitor .

  76. def notifyAll(): Unit

    Wakes up all threads that are waiting on the receiver object's monitor .

  77. def parameterEntityDecl(name: String, edef: EntityDef): Unit

  78. def parseDTD(): Unit

    parses document type declaration and assigns it to instance variable dtd .

  79. def parsedEntityDecl(name: String, edef: EntityDef): Unit

  80. def peReference(name: String): Unit

  81. def pop(): Unit

  82. var pos: Int

    holds the position in the source file

  83. val preserveWS: Boolean

    if true, does not remove surplus whitespace

  84. def procInstr(pos: Int, target: String, txt: String): ProcInstr

    callback method invoked by MarkupParser after parsing PI .

  85. def prolog(): (Option[String], Option[String], Option[Boolean])

    <? prolog ::= xml S? // this is a bit more lenient than necessary .

  86. def pubidLiteral(): String

  87. def push(entityName: String): Unit

  88. def pushExternal(systemId: String): Unit

  89. def putChar(c: Char): StringBuilder

    append Unicode character to name buffer

  90. def replacementText(entityName: String): Source

  91. def reportSyntaxError(str: String): Unit

  92. def reportSyntaxError(pos: Int, str: String): Unit

  93. def reportValidationError(pos: Int, str: String): Unit

  94. def returning[T](x: T)(f: (T) ⇒ Unit): T

    Apply a function and return the passed value

  95. def saving[A, B](getter: A, setter: (A) ⇒ Unit)(body: ⇒ B): B

    Execute body with a variable saved and restored after execution

  96. def synchronized[T0](arg0: T0): T0

  97. def systemLiteral(): String

    attribute value, terminated by either ' or " .

  98. def text(pos: Int, txt: String): Text

    callback method invoked by MarkupParser after parsing text .

  99. def textDecl(): (Option[String], Option[String])

    prolog, but without standalone

  100. var tmppos: Int

    holds temporary values of pos

  101. def toString(): String

    Returns a string representation of the object .

  102. def truncatedError(msg: String): Nothing

  103. def unparsedEntityDecl(name: String, extID: ExternalID, notat: String): Unit

  104. def unreachable: Nothing

  105. def wait(): Unit

  106. def wait(arg0: Long, arg1: Int): Unit

  107. def wait(arg0: Long): Unit

  108. def xAttributeValue(): String

  109. def xAttributeValue(endCh: Char): String

    attribute value, terminated by either ' or " .

  110. def xAttributes(pscope: NamespaceBinding): (MetaData, NamespaceBinding)

    parse attribute and create namespace scope, metadata [41] Attributes ::= { S Name Eq AttValue }

  111. def xCharData: NodeSeq

    '<! CharData ::= [CDATA[ ( {char} - {char}"]]>"{char} ) ']]>'

  112. def xCharRef: String

  113. def xCharRef(it: Iterator[Char]): String

  114. def xCharRef(ch: () ⇒ Char, nextch: () ⇒ Unit): String

    CharRef ::= "&#" '0' .

  115. def xComment: NodeSeq

    Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'

  116. def xEQ: Unit

    scan [S] '=' [S]

  117. def xEndTag(startName: String): Unit

    [42] '<' xmlEndTag ::= '<' '/' Name S? '>'

  118. def xEntityValue(): String

    entity value, terminated by either ' or " .

  119. def xHandleError(that: Char, msg: String): Unit

  120. def xName: String

    actually, Name ::= (Letter | '_' | ':') (NameChar)* but starting with ':' cannot happen Name ::= (Letter | '_') (NameChar)*

  121. def xProcInstr: NodeSeq

    '?' {Char})]'?>'

  122. def xSpace: Unit

    scan [3] S ::= (#x20 | #x9 | #xD | #xA)+

  123. def xSpaceOpt: Unit

    skip optional space S?

  124. def xTag(pscope: NamespaceBinding): (String, (MetaData, NamespaceBinding))

    parse a start or empty tag .

  125. def xTakeUntil[T](handler: (Int, String) ⇒ T, positioner: () ⇒ Int, until: String): T

    Take characters from input stream until given String "until" is seen .

  126. def xText: String

    parse character data .

  127. def xToken(that: Seq[Char]): Unit

  128. def xToken(that: Char): Unit

  129. def xmlProcInstr(): MetaData

    <? prolog ::= xml S .