a6429f8c22
Completely removed the html5lib sanitizer. Fixed the string-handling to work in both Ruby 1.8.x and 1.9.2. There are still, inexplicably, two functional tests that fail. But the rest seems to work quite well.
199 lines
6.4 KiB
Plaintext
199 lines
6.4 KiB
Plaintext
{"tests": [
|
|
|
|
{"description":"< in attribute name",
|
|
"input":"<z/0 <",
|
|
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
|
|
|
|
{"description":"< in attribute value",
|
|
"input":"<z x=<",
|
|
"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
|
|
|
|
{"description":"CR EOF after doctype name",
|
|
"input":"<!doctype html \r",
|
|
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
|
|
{"description":"CR EOF in tag name",
|
|
"input":"<z\r",
|
|
"output":["ParseError", ["StartTag", "z", {}]]},
|
|
|
|
{"description":"Zero hex numeric entity",
|
|
"input":"�",
|
|
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"Zero decimal numeric entity",
|
|
"input":"�",
|
|
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"Zero-prefixed hex numeric entity",
|
|
"input":"A",
|
|
"output":[["Character", "A"]]},
|
|
|
|
{"description":"Zero-prefixed decimal numeric entity",
|
|
"input":"A",
|
|
"output":[["Character", "A"]]},
|
|
|
|
{"description":"Empty hex numeric entities",
|
|
"input":"&#x &#X ",
|
|
"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]},
|
|
|
|
{"description":"Empty decimal numeric entities",
|
|
"input":"&# &#; ",
|
|
"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]},
|
|
|
|
{"description":"Non-BMP numeric entity",
|
|
"input":"𐀀",
|
|
"output":[["Character", "\uD800\uDC00"]]},
|
|
|
|
{"description":"Maximum non-BMP numeric entity",
|
|
"input":"",
|
|
"output":[["Character", "\uDBFF\uDFFF"]]},
|
|
|
|
{"description":"Above maximum numeric entity",
|
|
"input":"�",
|
|
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"32-bit hex numeric entity",
|
|
"input":"�",
|
|
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"33-bit hex numeric entity",
|
|
"input":"�",
|
|
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"33-bit decimal numeric entity",
|
|
"input":"�",
|
|
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"65-bit hex numeric entity",
|
|
"input":"�",
|
|
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"65-bit decimal numeric entity",
|
|
"input":"�",
|
|
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"Surrogate code point edge cases",
|
|
"input":"퟿����",
|
|
"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]},
|
|
|
|
{"description":"Uppercase start tag name",
|
|
"input":"<X>",
|
|
"output":[["StartTag", "x", {}]]},
|
|
|
|
{"description":"Uppercase end tag name",
|
|
"input":"</X>",
|
|
"output":[["EndTag", "x"]]},
|
|
|
|
{"description":"Uppercase attribute name",
|
|
"input":"<x X>",
|
|
"output":[["StartTag", "x", { "x":"" }]]},
|
|
|
|
{"description":"Tag/attribute name case edge values",
|
|
"input":"<x@AZ[`az{ @AZ[`az{>",
|
|
"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
|
|
|
|
{"description":"Duplicate different-case attributes",
|
|
"input":"<x x=1 x=2 X=3>",
|
|
"output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]},
|
|
|
|
{"description":"Uppercase close tag attributes",
|
|
"input":"</x X>",
|
|
"output":["ParseError", ["EndTag", "x"]]},
|
|
|
|
{"description":"Duplicate close tag attributes",
|
|
"input":"</x x x>",
|
|
"output":["ParseError", "ParseError", ["EndTag", "x"]]},
|
|
|
|
{"description":"Permitted slash",
|
|
"input":"<br/>",
|
|
"output":[["StartTag", "br", {}]]},
|
|
|
|
{"description":"Non-permitted slash",
|
|
"input":"<xr/>",
|
|
"output":["ParseError", ["StartTag", "xr", {}]]},
|
|
|
|
{"description":"Permitted slash but in close tag",
|
|
"input":"</br/>",
|
|
"output":["ParseError", ["EndTag", "br"]]},
|
|
|
|
{"description":"Doctype public case-sensitivity (1)",
|
|
"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
|
|
"output":[["DOCTYPE", "HtMl", "AbC", "XyZ", true]]},
|
|
|
|
{"description":"Doctype public case-sensitivity (2)",
|
|
"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
|
|
"output":[["DOCTYPE", "hTmL", "aBc", "xYz", true]]},
|
|
|
|
{"description":"Doctype system case-sensitivity (1)",
|
|
"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
|
|
"output":[["DOCTYPE", "HtMl", null, "XyZ", true]]},
|
|
|
|
{"description":"Doctype system case-sensitivity (2)",
|
|
"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
|
|
"output":[["DOCTYPE", "hTmL", null, "xYz", true]]},
|
|
|
|
{"description":"U+0000 in lookahead region after non-matching character",
|
|
"input":"<!doc>\u0000",
|
|
"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\uFFFD"]],
|
|
"ignoreErrorOrder":true},
|
|
|
|
{"description":"U+0000 in lookahead region",
|
|
"input":"<!doc\u0000",
|
|
"output":["ParseError", "ParseError", ["Comment", "doc\uFFFD"]],
|
|
"ignoreErrorOrder":true},
|
|
|
|
{"description":"CR followed by U+0000",
|
|
"input":"\r\u0000",
|
|
"output":["ParseError", ["Character", "\n\uFFFD"]],
|
|
"ignoreErrorOrder":true},
|
|
|
|
{"description":"CR followed by non-LF",
|
|
"input":"\r?",
|
|
"output":[["Character", "\n?"]]},
|
|
|
|
{"description":"CR at EOF",
|
|
"input":"\r",
|
|
"output":[["Character", "\n"]]},
|
|
|
|
{"description":"LF at EOF",
|
|
"input":"\n",
|
|
"output":[["Character", "\n"]]},
|
|
|
|
{"description":"CR LF",
|
|
"input":"\r\n",
|
|
"output":[["Character", "\n"]]},
|
|
|
|
{"description":"CR CR",
|
|
"input":"\r\r",
|
|
"output":[["Character", "\n\n"]]},
|
|
|
|
{"description":"LF LF",
|
|
"input":"\n\n",
|
|
"output":[["Character", "\n\n"]]},
|
|
|
|
{"description":"LF CR",
|
|
"input":"\n\r",
|
|
"output":[["Character", "\n\n"]]},
|
|
|
|
{"description":"text CR CR CR text",
|
|
"input":"text\r\r\rtext",
|
|
"output":[["Character", "text\n\n\ntext"]]},
|
|
|
|
{"description":"Doctype publik",
|
|
"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
|
|
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
|
|
{"description":"Doctype publi",
|
|
"input":"<!DOCTYPE html PUBLI",
|
|
"output":["ParseError", "ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
|
|
{"description":"Doctype sistem",
|
|
"input":"<!DOCTYPE html SISTEM \"AbC\">",
|
|
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
|
|
{"description":"Doctype sys",
|
|
"input":"<!DOCTYPE html SYS",
|
|
"output":["ParseError", "ParseError", ["DOCTYPE", "html", null, null, false]]}
|
|
|
|
]}
|