chame/minidom

Minimal DOMBuilder example. Implements the absolute minimum required for Chawan's HTML parser to work correctly.

For an example of a complete implementation, see Chawan's chadombuilder.

WARNING: this assumes valid UTF-8 to be the input encoding; text tokens containing invalid UTF-8 are silently discarded.

For a variant that can switch encodings when meta tags are encountered etc. see chame/minidom_cs.

Types

CharacterData = ref object of Node
  data*: string
Comment = ref object of CharacterData
Document = ref object of Node
  factory*: MAtomFactory
DocumentFragment = ref object of Node
DocumentType = ref object of Node
  name*: string
  publicId*: string
  systemId*: string
Element = ref object of Node
  localName*: MAtom
  namespace*: Namespace
  attrs*: seq[Attribute]
  document*: Document
HTMLTemplateElement = ref object of Element
  content*: DocumentFragment
MAtom = distinct int
MAtomFactory = ref object of RootObj
MiniDOMBuilder = ref object of DOMBuilder[Node, MAtom]
  document*: Document
  factory*: MAtomFactory
Node = ref object of RootObj
  childList*: seq[Node]
  parentNode* {.cursor.}: Node
Text = ref object of CharacterData

Procs

func `==`(a, b: MAtom): bool {.borrow, ...raises: [], tags: [], forbids: [].}
func atomToStr(factory: MAtomFactory; atom: MAtom): string {....raises: [],
    tags: [], forbids: [].}
func cmp(a, b: MAtom): int {.inline, ...raises: [], tags: [], forbids: [].}
func hash(atom: MAtom): Hash {.borrow, ...raises: [], tags: [], forbids: [].}
proc localNameStr(element: Element): string {....raises: [], tags: [], forbids: [].}
proc newMAtomFactory(): MAtomFactory {....raises: [Exception], tags: [RootEffect],
                                       forbids: [].}
proc newMiniDOMBuilder(factory: MAtomFactory): MiniDOMBuilder {....raises: [],
    tags: [], forbids: [].}
proc parseHTML(inputStream: Stream; opts = HTML5ParserOpts[Node, MAtom]();
               factory = newMAtomFactory()): Document {.
    ...raises: [IOError, OSError, Exception], tags: [ReadIOEffect, RootEffect],
    forbids: [].}

Read, parse and return an HTML document from inputStream, using parser options opts and MAtom factory factory.

inputStream is not required to be seekable.

For a description of HTML5ParserOpts, see the htmlparser module's documentation.

proc parseHTMLFragment(inputStream: Stream; element: Element;
                       opts: HTML5ParserOpts[Node, MAtom];
                       factory = newMAtomFactory()): seq[Node] {.
    ...raises: [IOError, OSError, Exception], tags: [ReadIOEffect, RootEffect],
    forbids: [].}

Read, parse and return the children of an HTML fragment from inputStream, using context element element and parser options opts.

For information on opts (an HTML5ParserOpts object), please consult the documentation of chame/htmlparser.nim.

For details on the HTML fragment parsing algorithm, see https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments

Note: the members ctx, initialTokenizerState, openElementsInit and pushInTemplate of opts are overridden (in accordance with the standard).

proc parseHTMLFragment(s: string; element: Element): seq[Node] {.
    ...raises: [IOError, OSError, Exception], tags: [ReadIOEffect, RootEffect],
    forbids: [].}

Convenience wrapper around parseHTMLFragment with opts.

Read, parse and return the children of an HTML fragment from the string s, using context element element.

For details on the HTML fragment parsing algorithm, see https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments

func preInsertionValidity(parent, node: Node; before: Node): bool {....raises: [],
    tags: [], forbids: [].}
func strToAtom(factory: MAtomFactory; s: string): MAtom {....raises: [], tags: [],
    forbids: [].}
func tagType(element: Element): TagType {....raises: [], tags: [], forbids: [].}
func tagTypeToAtom(factory: MAtomFactory; tagType: TagType): MAtom {....raises: [],
    tags: [], forbids: [].}
func toTagType(atom: MAtom): TagType {.inline, ...raises: [], tags: [], forbids: [].}

Iterators

iterator attrsStr(element: Element): tuple[name, value: string] {....raises: [],
    tags: [], forbids: [].}

Exports

TAG_RB, TAG_TYP, TAG_OBJECT, TAG_DFN, TAG_SUMMARY, TAG_DEFINITION_URL, TAG_HTTP_EQUIV, PREFIX_XML, TAG_PLAINTEXT, NamespacePrefix, PREFIX_XLINK, HTagTypes, XMLNS, TAG_EMBED, TAG_IMAGE, TAG_TH, TAG_DATALIST, TAG_COL, TAG_TABLE, TAG_INS, TAG_BODY, TAG_PRE, TAG_FRAMESET, TAG_B, TAG_DD, TAG_FONT, TAG_RT, TAG_FORM, TAG_BDO, TAG_OL, TAG_TIME, TAG_ABBR, TAG_LINK, TAG_MI, TAG_SPAN, TAG_HEADER, TAG_NOEMBED, TAG_LI, TAG_NOSCRIPT, TAG_DATA, TAG_KEYGEN, TAG_MALIGNMARK, TAG_IMG, TAG_BLINK, TAG_UNKNOWN, TAG_MGLYPH, TAG_OPTGROUP, TAG_SECTION, TAG_FIGURE, TAG_MARQUEE, TAG_MAP, TAG_A, TAG_DETAILS, QuirksMode, TAG_LABEL, TAG_DESC, TAG_DEL, TAG_MO, HTML, TAG_HTML, TAG_WBR, TAG_FRAME, TAG_CITE, TAG_SELECT, TAG_VAR, TAG_AREA, TAG_DIV, TAG_SUP, FormAssociatedElements, TAG_SVG, TAG_BR, TAG_DIR, TAG_OPTION, TAG_TFOOT, TAG_H5, TAG_SEARCH, TAG_KBD, Namespace, TAG_ANNOTATION_XML, TAG_TRACK, AllTagTypes, TAG_RTC, TAG_Q, TAG_MARK, TAG_PICTURE, MATHML, TAG_H3, TAG_IFRAME, TAG_HEAD, TAG_EM, TAG_NOBR, TAG_HR, TAG_CHARSET, TAG_H6, TAG_BLOCKQUOTE, TAG_DL, TAG_CONTENT, TAG_OUTPUT, TAG_ADDRESS, TAG_MN, TAG_ARTICLE, TAG_P, XLINK, TAG_LEGEND, TAG_XMP, TAG_RUBY, TAG_CODE, PREFIX_UNKNOWN, TAG_SAMP, TAG_AUDIO, TAG_MATH, TAG_FIGCAPTION, TAG_I, TAG_META, TAG_PROGRESS, TAG_STYLE, PREFIX_XMLNS, TAG_FOOTER, TAG_MS, TAG_U, TAG_H4, TAG_BUTTON, TAG_TEXTAREA, TAG_DIALOG, TAG_ENCODING, TAG_COLOR, ListedElements, TAG_PORTAL, TAG_SOURCE, TAG_TT, TAG_CAPTION, TAG_STRONG, TAG_ASIDE, TAG_CANVAS, SVG, TAG_H2, TAG_NOFRAMES, TAG_TEMPLATE, TAG_LISTING, TAG_TITLE, TAG_BASE, TAG_BGSOUND, TagType, TAG_MENU, TAG_FACE, TAG_BASEFONT, TAG_CENTER, TAG_TR, TAG_METER, TAG_VIDEO, TAG_SIZE, TAG_S, TAG_BIG, TAG_SARCASM, TAG_DT, TAG_RP, TAG_NAV, TAG_H1, TAG_TBODY, TAG_MAIN, TAG_THEAD, TAG_FIELDSET, TAG_SUB, TAG_COLGROUP, XML, TAG_SCRIPT, TAG_TD, TAG_STRIKE, TAG_SMALL, TAG_APPLET, TAG_INPUT, TAG_BDI, TAG_FOREIGN_OBJECT, TAG_UL, NO_PREFIX, TAG_MTEXT, NAMESPACE_UNKNOWN, NO_NAMESPACE, TAG_PARAM, TAG_HGROUP