chame/htmltokenizer

Types

Token[Atom] = ref object
  flags*: set[TokenFlag]
  case t*: TokenType
  of DOCTYPE:
    name*: string
    pubid*: string
    sysid*: string
  of START_TAG, END_TAG:
    tagname*: Atom
    attrs*: Table[Atom, string]
  of CHARACTER, CHARACTER_WHITESPACE, COMMENT:
    s*: string
  of EOF, CHARACTER_NULL:
    nil
TokenFlag = enum
  tfQuirks, tfPubid, tfSysid, tfSelfClosing
Tokenizer[Handle; Atom] = object
  laststart*: Token[Atom]
  state*: TokenizerState
  hasnonhtml*: bool
  tokqueue*: seq[Token[Atom]]
  inputBufIdx*: int
TokenizeResult = enum
  trDone, trEmit
TokenType = enum
  DOCTYPE, START_TAG, END_TAG, COMMENT, CHARACTER, CHARACTER_WHITESPACE,
  CHARACTER_NULL, EOF

Procs

func `$`(tok: Token): string
proc finish[Handle, Atom](tokenizer: var Tokenizer[Handle, Atom]): TokenizeResult
proc newTokenizer[Handle, Atom](dombuilder: DOMBuilder[Handle, Atom];
                                initialState = DATA): Tokenizer[Handle, Atom]
proc tokenize[Handle, Atom](tokenizer: var Tokenizer[Handle, Atom];
                            ibuf: openArray[char]): TokenizeResult