{"tests": [ {"description":"< in attribute name", "input":"<z/0 <", "output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]}, {"description":"< in attribute value", "input":"<z x=<", "output":["ParseError", ["StartTag", "z", {"x": "<"}]]}, {"description":"CR EOF after doctype name", "input":"<!doctype html \r", "output":["ParseError", ["DOCTYPE", "html", null, null, false]]}, {"description":"CR EOF in tag name", "input":"<z\r", "output":["ParseError", ["StartTag", "z", {}]]}, {"description":"Zero hex numeric entity", "input":"�", "output":["ParseError", "ParseError", ["Character", "\uFFFD"]]}, {"description":"Zero decimal numeric entity", "input":"�", "output":["ParseError", "ParseError", ["Character", "\uFFFD"]]}, {"description":"Zero-prefixed hex numeric entity", "input":"A", "output":[["Character", "A"]]}, {"description":"Zero-prefixed decimal numeric entity", "input":"A", "output":[["Character", "A"]]}, {"description":"Empty hex numeric entities", "input":"&#x &#X ", "output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]}, {"description":"Empty decimal numeric entities", "input":"&# &#; ", "output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]}, {"description":"Non-BMP numeric entity", "input":"𐀀", "output":[["Character", "\uD800\uDC00"]]}, {"description":"Maximum non-BMP numeric entity", "input":"", "output":[["Character", "\uDBFF\uDFFF"]]}, {"description":"Above maximum numeric entity", "input":"�", "output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"32-bit hex numeric entity", "input":"�", "output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"33-bit hex numeric entity", "input":"�", "output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"33-bit decimal numeric entity", "input":"�", "output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"65-bit hex numeric entity", "input":"�", "output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"65-bit decimal numeric entity", "input":"�", "output":["ParseError", ["Character", "\uFFFD"]]}, {"description":"Surrogate code point edge cases", "input":"퟿����", "output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]}, {"description":"Uppercase start tag name", "input":"<X>", "output":[["StartTag", "x", {}]]}, {"description":"Uppercase end tag name", "input":"</X>", "output":[["EndTag", "x"]]}, {"description":"Uppercase attribute name", "input":"<x X>", "output":[["StartTag", "x", { "x":"" }]]}, {"description":"Tag/attribute name case edge values", "input":"<x@AZ[`az{ @AZ[`az{>", "output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]}, {"description":"Duplicate different-case attributes", "input":"<x x=1 x=2 X=3>", "output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]}, {"description":"Uppercase close tag attributes", "input":"</x X>", "output":["ParseError", ["EndTag", "x"]]}, {"description":"Duplicate close tag attributes", "input":"</x x x>", "output":["ParseError", "ParseError", ["EndTag", "x"]]}, {"description":"Permitted slash", "input":"<br/>", "output":[["StartTag", "br", {}]]}, {"description":"Non-permitted slash", "input":"<xr/>", "output":["ParseError", ["StartTag", "xr", {}]]}, {"description":"Permitted slash but in close tag", "input":"</br/>", "output":["ParseError", ["EndTag", "br"]]}, {"description":"Doctype public case-sensitivity (1)", "input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">", "output":[["DOCTYPE", "HtMl", "AbC", "XyZ", true]]}, {"description":"Doctype public case-sensitivity (2)", "input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">", "output":[["DOCTYPE", "hTmL", "aBc", "xYz", true]]}, {"description":"Doctype system case-sensitivity (1)", "input":"<!DoCtYpE HtMl SyStEm \"XyZ\">", "output":[["DOCTYPE", "HtMl", null, "XyZ", true]]}, {"description":"Doctype system case-sensitivity (2)", "input":"<!dOcTyPe hTmL sYsTeM \"xYz\">", "output":[["DOCTYPE", "hTmL", null, "xYz", true]]}, {"description":"U+0000 in lookahead region after non-matching character", "input":"<!doc>\u0000", "output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\uFFFD"]], "ignoreErrorOrder":true}, {"description":"U+0000 in lookahead region", "input":"<!doc\u0000", "output":["ParseError", "ParseError", ["Comment", "doc\uFFFD"]], "ignoreErrorOrder":true}, {"description":"CR followed by U+0000", "input":"\r\u0000", "output":["ParseError", ["Character", "\n\uFFFD"]], "ignoreErrorOrder":true}, {"description":"CR followed by non-LF", "input":"\r?", "output":[["Character", "\n?"]]}, {"description":"CR at EOF", "input":"\r", "output":[["Character", "\n"]]}, {"description":"LF at EOF", "input":"\n", "output":[["Character", "\n"]]}, {"description":"CR LF", "input":"\r\n", "output":[["Character", "\n"]]}, {"description":"CR CR", "input":"\r\r", "output":[["Character", "\n\n"]]}, {"description":"LF LF", "input":"\n\n", "output":[["Character", "\n\n"]]}, {"description":"LF CR", "input":"\n\r", "output":[["Character", "\n\n"]]}, {"description":"text CR CR CR text", "input":"text\r\r\rtext", "output":[["Character", "text\n\n\ntext"]]}, {"description":"Doctype publik", "input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">", "output":["ParseError", ["DOCTYPE", "html", null, null, false]]}, {"description":"Doctype publi", "input":"<!DOCTYPE html PUBLI", "output":["ParseError", "ParseError", ["DOCTYPE", "html", null, null, false]]}, {"description":"Doctype sistem", "input":"<!DOCTYPE html SISTEM \"AbC\">", "output":["ParseError", ["DOCTYPE", "html", null, null, false]]}, {"description":"Doctype sys", "input":"<!DOCTYPE html SYS", "output":["ParseError", "ParseError", ["DOCTYPE", "html", null, null, false]]} ]}