Minimal DOMBuilder example. Implements the absolute minimum required for Chawan's HTML parser to work correctly.
For an example of a complete implementation, see Chawan's chadombuilder.
WARNING: this assumes valid UTF-8 to be the input encoding; text tokens containing invalid UTF-8 are silently discarded.
For a variant that can switch encodings when meta tags are encountered etc. see chame/minidom_cs.
Types
CharacterData = ref object of Node data*: string
Comment = ref object of CharacterData
Document = ref object of Node factory*: MAtomFactory
DocumentFragment = ref object of Node
DocumentType = ref object of Node name*: string publicId*: string systemId*: string
Element = ref object of Node localName*: MAtom namespace*: Namespace attrs*: seq[Attribute] document*: Document
HTMLTemplateElement = ref object of Element content*: DocumentFragment
MAtom = distinct int
MAtomFactory = ref object of RootObj
MiniDOMBuilder = ref object of DOMBuilder[Node, MAtom] document*: Document factory*: MAtomFactory
Text = ref object of CharacterData
Procs
func atomToStr(factory: MAtomFactory; atom: MAtom): string {....raises: [], tags: [], forbids: [].}
proc localNameStr(element: Element): string {....raises: [], tags: [], forbids: [].}
proc newMAtomFactory(): MAtomFactory {....raises: [Exception], tags: [RootEffect], forbids: [].}
proc newMiniDOMBuilder(factory: MAtomFactory): MiniDOMBuilder {....raises: [], tags: [], forbids: [].}
proc parseHTML(inputStream: Stream; opts = HTML5ParserOpts[Node, MAtom](); factory = newMAtomFactory()): Document {. ...raises: [IOError, OSError, Exception], tags: [ReadIOEffect, RootEffect], forbids: [].}
-
Read, parse and return an HTML document from inputStream, using parser options opts and MAtom factory factory.
inputStream is not required to be seekable.
For a description of HTML5ParserOpts, see the htmlparser module's documentation.
proc parseHTMLFragment(inputStream: Stream; element: Element; opts: HTML5ParserOpts[Node, MAtom]; factory = newMAtomFactory()): seq[Node] {. ...raises: [IOError, OSError, Exception], tags: [ReadIOEffect, RootEffect], forbids: [].}
-
Read, parse and return the children of an HTML fragment from inputStream, using context element element and parser options opts.
For information on opts (an HTML5ParserOpts object), please consult the documentation of chame/htmlparser.nim.
For details on the HTML fragment parsing algorithm, see https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
Note: the members ctx, initialTokenizerState, openElementsInit and pushInTemplate of opts are overridden (in accordance with the standard).
proc parseHTMLFragment(s: string; element: Element): seq[Node] {. ...raises: [IOError, OSError, Exception], tags: [ReadIOEffect, RootEffect], forbids: [].}
-
Convenience wrapper around parseHTMLFragment with opts.
Read, parse and return the children of an HTML fragment from the string s, using context element element.
For details on the HTML fragment parsing algorithm, see https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
func preInsertionValidity(parent, node: Node; before: Node): bool {....raises: [], tags: [], forbids: [].}
func strToAtom(factory: MAtomFactory; s: string): MAtom {....raises: [], tags: [], forbids: [].}
func tagTypeToAtom(factory: MAtomFactory; tagType: TagType): MAtom {....raises: [], tags: [], forbids: [].}
Exports
-
TAG_RB, TAG_TYP, TAG_OBJECT, TAG_DFN, TAG_SUMMARY, TAG_DEFINITION_URL, TAG_HTTP_EQUIV, PREFIX_XML, TAG_PLAINTEXT, NamespacePrefix, PREFIX_XLINK, HTagTypes, XMLNS, TAG_EMBED, TAG_IMAGE, TAG_TH, TAG_DATALIST, TAG_COL, TAG_TABLE, TAG_INS, TAG_BODY, TAG_PRE, TAG_FRAMESET, TAG_B, TAG_DD, TAG_FONT, TAG_RT, TAG_FORM, TAG_BDO, TAG_OL, TAG_TIME, TAG_ABBR, TAG_LINK, TAG_MI, TAG_SPAN, TAG_HEADER, TAG_NOEMBED, TAG_LI, TAG_NOSCRIPT, TAG_DATA, TAG_KEYGEN, TAG_MALIGNMARK, TAG_IMG, TAG_BLINK, TAG_UNKNOWN, TAG_MGLYPH, TAG_OPTGROUP, TAG_SECTION, TAG_FIGURE, TAG_MARQUEE, TAG_MAP, TAG_A, TAG_DETAILS, QuirksMode, TAG_LABEL, TAG_DESC, TAG_DEL, TAG_MO, HTML, TAG_HTML, TAG_WBR, TAG_FRAME, TAG_CITE, TAG_SELECT, TAG_VAR, TAG_AREA, TAG_DIV, TAG_SUP, FormAssociatedElements, TAG_SVG, TAG_BR, TAG_DIR, TAG_OPTION, TAG_TFOOT, TAG_H5, TAG_SEARCH, TAG_KBD, Namespace, TAG_ANNOTATION_XML, TAG_TRACK, AllTagTypes, TAG_RTC, TAG_Q, TAG_MARK, TAG_PICTURE, MATHML, TAG_H3, TAG_IFRAME, TAG_HEAD, TAG_EM, TAG_NOBR, TAG_HR, TAG_CHARSET, TAG_H6, TAG_BLOCKQUOTE, TAG_DL, TAG_CONTENT, TAG_OUTPUT, TAG_ADDRESS, TAG_MN, TAG_ARTICLE, TAG_P, XLINK, TAG_LEGEND, TAG_XMP, TAG_RUBY, TAG_CODE, PREFIX_UNKNOWN, TAG_SAMP, TAG_AUDIO, TAG_MATH, TAG_FIGCAPTION, TAG_I, TAG_META, TAG_PROGRESS, TAG_STYLE, PREFIX_XMLNS, TAG_FOOTER, TAG_MS, TAG_U, TAG_H4, TAG_BUTTON, TAG_TEXTAREA, TAG_DIALOG, TAG_ENCODING, TAG_COLOR, ListedElements, TAG_PORTAL, TAG_SOURCE, TAG_TT, TAG_CAPTION, TAG_STRONG, TAG_ASIDE, TAG_CANVAS, SVG, TAG_H2, TAG_NOFRAMES, TAG_TEMPLATE, TAG_LISTING, TAG_TITLE, TAG_BASE, TAG_BGSOUND, TagType, TAG_MENU, TAG_FACE, TAG_BASEFONT, TAG_CENTER, TAG_TR, TAG_METER, TAG_VIDEO, TAG_SIZE, TAG_S, TAG_BIG, TAG_SARCASM, TAG_DT, TAG_RP, TAG_NAV, TAG_H1, TAG_TBODY, TAG_MAIN, TAG_THEAD, TAG_FIELDSET, TAG_SUB, TAG_COLGROUP, XML, TAG_SCRIPT, TAG_TD, TAG_STRIKE, TAG_SMALL, TAG_APPLET, TAG_INPUT, TAG_BDI, TAG_FOREIGN_OBJECT, TAG_UL, NO_PREFIX, TAG_MTEXT, NAMESPACE_UNKNOWN, NO_NAMESPACE, TAG_PARAM, TAG_HGROUP