scala.xml.parsing

ConstructingParser

class ConstructingParser extends ConstructingHandler with ExternalSources with MarkupParser

An xml parser. parses XML and invokes callback methods of a MarkupHandler. Don't forget to call next.ch on a freshly instantiated parser in order to initialize it. If you get the parser from the object method, initialization is already done for you.

object parseFromURL {
def main(args:Array[String]): Unit = {
val url = args(0);
val src = scala.io.Source.fromURL(url);
val cpa = scala.xml.parsing.ConstructingParser.fromSource(src, false); // fromSource initializes automatically
val doc = cpa.document();
// let's see what it is
val ppr = new scala.xml.PrettyPrinter(80,5);
val ele = doc.docElem;
Console.println("finished parsing");
val out = ppr.format(ele);
Console.println(out);
}
}

Inherits

  1. MarkupParser
  2. TokenTests
  3. ExternalSources
  4. ConstructingHandler
  5. MarkupHandler
  6. Logged
  7. AnyRef
  8. Any

Value Members

  1. def appendText(pos: Int, ts: NodeBuffer, txt: String): Unit

  2. def attListDecl(name: String, attList: List[AttrDecl]): Unit

  3. def attrDecl(): Unit

    <! attlist := ATTLIST

    <! attlist := ATTLIST

    definition classes: MarkupParser
  4. var ch: Char

    holds the next character

    holds the next character

    definition classes: MarkupParser
  5. def checkPubID(s: String): Boolean

  6. def checkSysID(s: String): Boolean

  7. def comment(pos: Int, txt: String): Comment

    callback method invoked by MarkupParser after parsing comment

    callback method invoked by MarkupParser after parsing comment.

    definition classes: ConstructingHandlerMarkupHandler
  8. def content(pscope: NamespaceBinding): NodeSeq

    content1 ::= '<' content1 | '&' charref

    content1 ::= '<' content1 | '&' charref ...

    definition classes: MarkupParser
  9. def content1(pscope: NamespaceBinding, ts: NodeBuffer): Unit

    '<' content1 ::=

    '<' content1 ::= ...

    definition classes: MarkupParser
  10. var curInput: Source

  11. var decls: List[Decl]

  12. def document(): Document

    [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') [25] Eq ::= S? '=' S? [26] VersionNum ::= '1

    [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') [25] Eq ::= S? '=' S? [26] VersionNum ::= '1.0' [27] Misc ::= Comment | PI | S

    definition classes: MarkupParser
  13. var dtd: DTD

  14. def elem(pos: Int, pre: String, label: String, attrs: MetaData, pscope: NamespaceBinding, nodes: NodeSeq): NodeSeq

    callback method invoked by MarkupParser after parsing an elementm, between the elemStart and elemEnd callbacks

    callback method invoked by MarkupParser after parsing an elementm, between the elemStart and elemEnd callbacks

    pos

    the position in the source file

    pre

    the prefix

    label

    the local name

    attrs

    the attributes (metadata)

    returns

    ...

    definition classes: ConstructingHandlerMarkupHandler
  15. def elemDecl(n: String, cmstr: String): Unit

  16. def elemEnd(pos: Int, pre: String, label: String): Unit

    callback method invoked by MarkupParser after end-tag of element

    callback method invoked by MarkupParser after end-tag of element.

    pos

    the position in the source file

    pre

    the prefix

    label

    the local name

    definition classes: MarkupHandler
  17. def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding): Unit

    callback method invoked by MarkupParser after start-tag of element

    callback method invoked by MarkupParser after start-tag of element.

    pos

    the position in the sourcefile

    pre

    the prefix

    label

    the local name

    attrs

    the attributes (metadata)

    definition classes: MarkupHandler
  18. def element(pscope: NamespaceBinding): NodeSeq

  19. def element1(pscope: NamespaceBinding): NodeSeq

    '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag | xmlTag1 '/' '>'

    '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag | xmlTag1 '/' '>'

    definition classes: MarkupParser
  20. def elementDecl(): Unit

    <! element := ELEMENT

    <! element := ELEMENT

    definition classes: MarkupParser
  21. def endDTD(n: String): Unit

  22. var ent: Map[String, EntityDecl]

  23. def entityDecl(): Unit

    <! element := ELEMENT

    <! element := ELEMENT

    definition classes: MarkupParser
  24. def entityRef(pos: Int, n: String): EntityRef

    callback method invoked by MarkupParser after parsing entity ref

    callback method invoked by MarkupParser after parsing entity ref.

    definition classes: ConstructingHandlerMarkupHandler
  25. var eof: Boolean

  26. def equals(arg0: Any): Boolean

    This method is used to compare the receiver object (this) with the argument object (arg0) for equivalence

    This method is used to compare the receiver object (this) with the argument object (arg0) for equivalence.

    The default implementations of this method is an equivalence relation:

    • It is reflexive: for any instance x of type Any, x.equals(x) should return true.
    • It is symmetric: for any instances x and y of type Any, x.equals(y) should return true if and only if y.equals(x) returns true.
    • It is transitive: for any instances x, y, and z of type AnyRef if x.equals(y) returns true and y.equals(z) returns true, then x.equals(z) should return true.

    If you override this method, you should verify that your implementation remains an equivalence relation. Additionally, when overriding this method it is often necessary to override hashCode to ensure that objects that are "equal" (o1.equals(o2) returns true) hash to the same Int (o1.hashCode.equals(o2.hashCode)).

    arg0

    the object to compare against this object for equality.

    returns

    true if the receiver object is equivalent to the argument; false otherwise.

    definition classes: AnyRef ⇐ Any
  27. var extIndex: Int

  28. def extSubset(): Unit

  29. def externalID(): ExternalID

    externalID ::= SYSTEM S syslit PUBLIC S pubid S syslit

    externalID ::= SYSTEM S syslit PUBLIC S pubid S syslit

    definition classes: MarkupParser
  30. def externalSource(systemId: String): Source

    ...

    systemId

    ...

    returns

    ...

    definition classes: ExternalSources
  31. def hashCode(): Int

    Returns a hash code value for the object

    Returns a hash code value for the object.

    The default hashing algorithm is platform dependent.

    Note that it is allowed for two objects to have identical hash codes (o1.hashCode.equals(o2.hashCode)) yet not be equal (o1.equals(o2) returns false). A degenerate implementation could always return 0. However, it is required that if two objects are equal (o1.equals(o2) returns true) that they have identical hash codes (o1.hashCode.equals(o2.hashCode)). Therefore, when overriding this method, be sure to verify that the behavior is consistent with the equals method.

    definition classes: AnyRef ⇐ Any
  32. def initialize: MarkupParser with MarkupHandler

    As the current code requires you to call nextch once manually after construction, this method formalizes that suboptimal reality

    As the current code requires you to call nextch once manually after construction, this method formalizes that suboptimal reality.

    definition classes: MarkupParser
  33. var inpStack: List[Source]

    stack of inputs

    stack of inputs

    definition classes: MarkupParser
  34. val input: Source

  35. def intSubset(): Unit

    "rec-xml/#ExtSubset" pe references may not occur within markup declarations

    "rec-xml/#ExtSubset" pe references may not occur within markup declarations

    definition classes: MarkupParser
  36. def isAlpha(c: Char): Boolean

    These are 99% sure to be redundant but refactoring on the safe side

    These are 99% sure to be redundant but refactoring on the safe side.

    definition classes: TokenTests
  37. def isAlphaDigit(c: Char): Boolean

  38. def isName(s: String): Boolean

    Name ::= ( Letter | '_' ) (NameChar)*

    Name ::= ( Letter | '_' ) (NameChar)*

    see [5] of XML 1.0 specification

    definition classes: TokenTests
  39. def isNameChar(ch: Char): Boolean

    NameChar ::= Letter | Digit | '

    NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender

    see [4] and Appendix B of XML 1.0 specification

    definition classes: TokenTests
  40. def isNameStart(ch: Char): Boolean

    NameStart ::= ( Letter | '_' ) where Letter means in one of the Unicode general categories { Ll, Lu, Lo, Lt, Nl }

    NameStart ::= ( Letter | '_' ) where Letter means in one of the Unicode general categories { Ll, Lu, Lo, Lt, Nl }

    We do not allow a name to start with ':'. see [3] and Appendix B of XML 1.0 specification

    definition classes: TokenTests
  41. def isPubIDChar(ch: Char): Boolean

  42. def isValidIANAEncoding(ianaEncoding: Seq[Char]): Boolean

    Returns true if the encoding name is a valid IANA encoding

    Returns true if the encoding name is a valid IANA encoding. This method does not verify that there is a decoder available for this encoding, only that the characters are valid for an IANA encoding name.

    ianaEncoding

    The IANA encoding name.

    definition classes: TokenTests
  43. val isValidating: Boolean

    returns true is this markup handler is validating

    returns true is this markup handler is validating

    definition classes: MarkupHandler
  44. def log(msg: String): Unit

    This method should log the message given as argument somewhere as a side-effect

    This method should log the message given as argument somewhere as a side-effect.

    msg

    ...

  45. def lookupElemDecl(Label: String): ElemDecl

  46. def markupDecl(): Unit

  47. def markupDecl1(): Any

  48. def nextch: Unit

    this method assign the next character to ch and advances in input

    this method assign the next character to ch and advances in input

    definition classes: MarkupParser
  49. def normalizeAttributeValue(attval: String): String

    for the moment, replace only character references see spec 3

    for the moment, replace only character references see spec 3.3.3 precond: cbuf empty

    definition classes: MarkupParser
  50. def notationDecl(): Unit

    'N' notationDecl ::= "OTATION"

    'N' notationDecl ::= "OTATION"

    definition classes: MarkupParser
  51. def notationDecl(notat: String, extID: ExternalID): Unit

  52. def parameterEntityDecl(name: String, edef: EntityDef): Unit

  53. def parseDTD(): Unit

    parses document type declaration and assigns it to instance variable dtd

    parses document type declaration and assigns it to instance variable dtd.

    <! parseDTD ::= DOCTYPE name ... >

    definition classes: MarkupParser
  54. def parsedEntityDecl(name: String, edef: EntityDef): Unit

  55. def peReference(name: String): Unit

  56. def pop(): Unit

  57. var pos: Int

    holds the position in the source file

    holds the position in the source file

    definition classes: MarkupParser
  58. val preserveWS: Boolean

    if true, does not remove surplus whitespace

    if true, does not remove surplus whitespace

  59. def procInstr(pos: Int, target: String, txt: String): ProcInstr

    callback method invoked by MarkupParser after parsing PI

    callback method invoked by MarkupParser after parsing PI.

    definition classes: ConstructingHandlerMarkupHandler
  60. def prolog(): (Option[String], Option[String], Option[Boolean])

    <? prolog ::= xml S? // this is a bit more lenient than necessary

    <? prolog ::= xml S? // this is a bit more lenient than necessary...

    definition classes: MarkupParser
  61. def pubidLiteral(): String

  62. def push(entityName: String): Unit

  63. def pushExternal(systemId: String): Unit

  64. def replacementText(entityName: String): Source

  65. def reportSyntaxError(str: String): Unit

  66. def reportSyntaxError(pos: Int, str: String): Unit

    PublicID (without system, only used in NOTATION)

    PublicID (without system, only used in NOTATION)

    definition classes: MarkupParser
  67. def reportValidationError(pos: Int, str: String): Unit

  68. def systemLiteral(): String

    attribute value, terminated by either ' or "

    attribute value, terminated by either ' or ". value may not contain <. AttValue ::= ' { _ } ' | " { _ } "

    definition classes: MarkupParser
  69. def text(pos: Int, txt: String): Text

    callback method invoked by MarkupParser after parsing text

    callback method invoked by MarkupParser after parsing text.

    definition classes: ConstructingHandlerMarkupHandler
  70. def textDecl(): (Option[String], Option[String])

    prolog, but without standalone

    prolog, but without standalone

    definition classes: MarkupParser
  71. var tmppos: Int

    holds temporary values of pos

    holds temporary values of pos

    definition classes: MarkupParser
  72. def toString(): String

    Returns a string representation of the object

    Returns a string representation of the object.

    The default representation is platform dependent.

    definition classes: AnyRef ⇐ Any
  73. def unparsedEntityDecl(name: String, extID: ExternalID, notat: String): Unit

  74. def xAttributeValue(): String

    attribute value, terminated by either ' or "

    attribute value, terminated by either ' or ". value may not contain <. AttValue ::= ' { _ } ' | " { _ } "

    definition classes: MarkupParser
  75. def xAttributes(pscope: NamespaceBinding): (MetaData, NamespaceBinding)

    parse attribute and create namespace scope, metadata [41] Attributes ::= { S Name Eq AttValue }

    parse attribute and create namespace scope, metadata [41] Attributes ::= { S Name Eq AttValue }

    definition classes: MarkupParser
  76. def xCharData: NodeSeq

    '<! CharData ::= [CDATA[ ( {char} - {char}"]]>"{char} ) ']]>'

    '<! CharData ::= [CDATA[ ( {char} - {char}"]]>"{char} ) ']]>'

    see [15]

    definition classes: MarkupParser
  77. def xCharRef(ch: () ⇒ Char, nextch: () ⇒ Unit): String

    CharRef ::= "&#" '0'

    CharRef ::= "&#" '0'..'9' {'0'..'9'} ";" | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"

    see [66]

    definition classes: MarkupParser
  78. def xComment: NodeSeq

    Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'

    Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'

    see [15]

    definition classes: MarkupParser
  79. def xEQ: Unit

    scan [S] '=' [S]

    scan [S] '=' [S]

    definition classes: MarkupParser
  80. def xEndTag(n: String): Unit

    [42] '<' xmlEndTag ::= '<' '/' Name S? '>'

    [42] '<' xmlEndTag ::= '<' '/' Name S? '>'

    definition classes: MarkupParser
  81. def xEntityValue(): String

    entity value, terminated by either ' or "

    entity value, terminated by either ' or ". value may not contain <. AttValue ::= ' { _ } ' | " { _ } "

    definition classes: MarkupParser
  82. def xName: String

    Name ::= (Letter | '_' | ':') (NameChar)*

    Name ::= (Letter | '_' | ':') (NameChar)*

    see [5] of XML 1.0 specification

    definition classes: MarkupParser
  83. def xProcInstr: NodeSeq

    '<?' ProcInstr ::= Name [S ({Char} - ({Char}'>?' {Char})]'?>'

    '<?' ProcInstr ::= Name [S ({Char} - ({Char}'>?' {Char})]'?>'

    see [15]

    definition classes: MarkupParser
  84. def xSpace: Unit

    scan [3] S ::= (#x20 | #x9 | #xD | #xA)+

    scan [3] S ::= (#x20 | #x9 | #xD | #xA)+

    definition classes: MarkupParser
  85. def xSpaceOpt: Unit

    skip optional space S?

    skip optional space S?

    definition classes: MarkupParser
  86. def xText: String

    parse character data

    parse character data. precondition: xEmbeddedBlock == false (we are not in a scala block)

    definition classes: MarkupParser
  87. def xToken(that: Seq[Char]): Unit

  88. def xToken(that: Char): Unit

    munch expected XML token, report syntax error for unexpected

    munch expected XML token, report syntax error for unexpected

    definition classes: MarkupParser
  89. def xmlProcInstr(): MetaData

    <? prolog ::= xml S

    <? prolog ::= xml S ... ?>

    definition classes: MarkupParser

Instance constructors

  1. new ConstructingParser(input: Source, preserveWS: Boolean)

  2. new ConstructingParser()