Minor S5 tweaks and Sync with Latest HTML5lib

This commit is contained in:
Jacques Distler 2007-08-30 12:19:10 -05:00
parent dbed460843
commit 81d3cdc8e4
81 changed files with 9887 additions and 1687 deletions

View file

@ -11,12 +11,24 @@
"input":"foo</bar>",
"output":[["Character", "foo"], ["EndTag", "bar"]]},
{"description":"End tag closing RCDATA or CDATA (case-insensitivity)",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"bar",
"input":"foo</bAr>",
"output":[["Character", "foo"], ["EndTag", "bar"]]},
{"description":"End tag with incorrect name in RCDATA or CDATA",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"baz",
"input":"</foo>bar</baz>",
"output":[["Character", "</foo>bar"], ["EndTag", "baz"]]},
{"description":"End tag with incorrect name in RCDATA or CDATA (starting like correct name)",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"baz",
"input":"</foo>bar</bazaar>",
"output":[["Character", "</foo>bar</bazaar>"]]},
{"description":"End tag closing RCDATA or CDATA, switching back to PCDATA",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"bar",

File diff suppressed because it is too large Load diff

View file

@ -161,6 +161,10 @@
"input":"<h a='&not1'>",
"output":["ParseError", ["StartTag", "h", {"a":"&not1"}]]},
{"description":"Entity in attribute without semicolon ending in i",
"input":"<h a='&noti'>",
"output":["ParseError", ["StartTag", "h", {"a":"&noti"}]]},
{"description":"Entity in attribute without semicolon",
"input":"<h a='&COPY'>",
"output":["ParseError", ["StartTag", "h", {"a":"©"}]]}

View file

@ -60,14 +60,6 @@
"input":"&#xD869;&#xDED6;",
"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
{"description":"Numeric entity representing a Windows-1252 'codepoint'",
"input":"&#137;",
"output":["ParseError", ["Character", "\u2030"]]},
{"description":"Hexadecimal entity representing a Windows-1252 'codepoint'",
"input":"&#x89;",
"output":["ParseError", ["Character", "\u2030"]]},
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
"input":"&#xaBcD;",
"output":[["Character", "\uABCD"]]},
@ -122,7 +114,15 @@
{"description":"Null Byte Replacement",
"input":"\u0000",
"output":["ParseError", ["Character", "\ufffd"]]}
"output":["ParseError", ["Character", "\ufffd"]]},
{"description":"Comment with dash",
"input":"<!---x",
"output":["ParseError", ["Comment", "-x"]]},
{"description":"Entity + newline",
"input":"\nx\n&gt;\n",
"output":[["Character","\nx\n>\n"]]}
]}

View file

@ -0,0 +1,367 @@
{"tests": [
{"description":"<",
"input":"<",
"output":["ParseError", ["Character", "<"]]},
{"description":"<>",
"input":"<>",
"output":["ParseError", ["Character", "<>"]]},
{"description":"<!",
"input":"<!",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!>",
"input":"<!>",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!--",
"input":"<!--",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!-->",
"input":"<!-->",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!---",
"input":"<!---",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!--->",
"input":"<!--->",
"output":["ParseError", ["Comment", ""]]},
{"description":"<!---->",
"input":"<!---->",
"output":[["Comment", ""]]},
{"description":"<!-----",
"input":"<!-----",
"output":["ParseError", "ParseError", ["Comment", "-"]]},
{"description":"<!----.",
"input":"<!----.",
"output":["ParseError", "ParseError", ["Comment", "--."]]},
{"description":"<!---?",
"input":"<!---?",
"output":["ParseError", ["Comment", "-?"]]},
{"description":"<!--?-",
"input":"<!--?-",
"output":["ParseError", ["Comment", "?"]]},
{"description":"<!--?--",
"input":"<!--?--",
"output":["ParseError", ["Comment", "?"]]},
{"description":"<!--?-.",
"input":"<!--?-.",
"output":["ParseError", ["Comment", "?-."]]},
{"description":"<!--?.",
"input":"<!--?.",
"output":["ParseError", ["Comment", "?."]]},
{"description":"<?>",
"input":"<?>",
"output":["ParseError", ["Comment", "?"]]},
{"description":"<??",
"input":"<??",
"output":["ParseError", ["Comment", "??"]]},
{"description":"</",
"input":"</",
"output":["ParseError", ["Character", "</"]]},
{"description":"</>",
"input":"</>",
"output":["ParseError"]},
{"description":"</?",
"input":"</?",
"output":["ParseError", ["Comment", "?"]]},
{"description":">",
"input":">",
"output":[["Character", ">"]]},
{"description":"-",
"input":"-",
"output":[["Character", "-"]]},
{"description":"?",
"input":"?",
"output":[["Character", "?"]]},
{"description":"&",
"input":"&",
"output":[["Character", "&"]]},
{"description":"&#",
"input":"&#",
"output":["ParseError", ["Character", "&#"]]},
{"description":"&#9",
"input":"&#9",
"output":["ParseError", ["Character", "\t"]]},
{"description":"<!doctype >",
"input":"<!doctype >",
"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
{"description":"<!doctype ",
"input":"<!doctype ",
"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
{"description":"<!doctype!>",
"input":"<!doctype!>",
"output":["ParseError", ["DOCTYPE", "!", null, null, true]]},
{"description":"<!doctype! >",
"input":"<!doctype! >",
"output":["ParseError", ["DOCTYPE", "!", null, null, true]]},
{"description":"<!doctype! ",
"input":"<!doctype! ",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! ?>",
"input":"<!doctype! ?>",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! ??",
"input":"<!doctype! ??",
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype!?",
"input":"<!doctype!?",
"output":["ParseError", "ParseError", ["DOCTYPE", "!?", null, null, false]]},
{"description":"<!doctype! public>",
"input":"<!doctype! public>",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! public ",
"input":"<!doctype! public ",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! public?",
"input":"<!doctype! public?",
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! public''",
"input":"<!doctype! public''",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
{"description":"<!doctype! public'(",
"input":"<!doctype! public'(",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "(", null, false]]},
{"description":"<!doctype! public\"\">",
"input":"<!doctype! public\"\">",
"output":["ParseError", ["DOCTYPE", "!", "", null, true]]},
{"description":"<!doctype! public\"\" ",
"input":"<!doctype! public\"\" ",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
{"description":"<!doctype! public\"\"?",
"input":"<!doctype! public\"\"?",
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", "", null, false]]},
{"description":"<!doctype! public\"\"'",
"input":"<!doctype! public\"\"'",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", "", false]]},
{"description":"<!doctype! public\"\"\"",
"input":"<!doctype! public\"\"\"",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "", "", false]]},
{"description":"<!doctype! public\"#",
"input":"<!doctype! public\"#",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", "#", null, false]]},
{"description":"<!doctype! system>",
"input":"<!doctype! system>",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! system ",
"input":"<!doctype! system ",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! system?",
"input":"<!doctype! system?",
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
{"description":"<!doctype! system''",
"input":"<!doctype! system''",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
{"description":"<!doctype! system'(",
"input":"<!doctype! system'(",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "(", false]]},
{"description":"<!doctype! system\"\">",
"input":"<!doctype! system\"\">",
"output":["ParseError", ["DOCTYPE", "!", null, "", true]]},
{"description":"<!doctype! system\"\" ",
"input":"<!doctype! system\"\" ",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
{"description":"<!doctype! system\"\"?",
"input":"<!doctype! system\"\"?",
"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "!", null, "", false]]},
{"description":"<!doctype! system\"#",
"input":"<!doctype! system\"#",
"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, "#", false]]},
{"description":"</z",
"input":"</z",
"output":["ParseError", ["EndTag", "z"]]},
{"description":"<z>",
"input":"<z>",
"output":[["StartTag", "z", {}]]},
{"description":"<z ",
"input":"<z ",
"output":["ParseError", ["StartTag", "z", {}]]},
{"description":"<z/>",
"input":"<z/>",
"output":["ParseError", ["StartTag", "z", {}]]},
{"description":"<z/ ",
"input":"<z/ ",
"output":["ParseError", "ParseError", ["StartTag", "z", {}]]},
{"description":"<z//",
"input":"<z//",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {}]]},
{"description":"<z",
"input":"<z",
"output":["ParseError", ["StartTag", "z", {}]]},
{"description":"</z",
"input":"</z",
"output":["ParseError", ["EndTag", "z"]]},
{"description":"<z0",
"input":"<z0",
"output":["ParseError", ["StartTag", "z0", {}]]},
{"description":"<z/0=>",
"input":"<z/0=>",
"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0= ",
"input":"<z/0= ",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0=?>",
"input":"<z/0=?>",
"output":["ParseError", ["StartTag", "z", {"0": "?"}]]},
{"description":"<z/0=? ",
"input":"<z/0=? ",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "?"}]]},
{"description":"<z/0=??",
"input":"<z/0=??",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "??"}]]},
{"description":"<z/0=''",
"input":"<z/0=''",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0='&",
"input":"<z/0='&",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
{"description":"<z/0='%",
"input":"<z/0='%",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "%"}]]},
{"description":"<z/0=\"'",
"input":"<z/0=\"'",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "'"}]]},
{"description":"<z/0=\"\"",
"input":"<z/0=\"\"",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0=\"&",
"input":"<z/0=\"&",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
{"description":"<z/0=&",
"input":"<z/0=&",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "&"}]]},
{"description":"<z/0>",
"input":"<z/0>",
"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0 =",
"input":"<z/0 =",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0 >",
"input":"<z/0 >",
"output":["ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0 ",
"input":"<z/0 ",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0 /",
"input":"<z/0 /",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0/",
"input":"<z/0/",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/00",
"input":"<z/00",
"output":["ParseError", "ParseError", ["StartTag", "z", {"00": ""}]]},
{"description":"<z/0 0",
"input":"<z/0 0",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": ""}]]},
{"description":"<z/0='&#9",
"input":"<z/0='&#9",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
{"description":"<z/0=\"&#9",
"input":"<z/0=\"&#9",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
{"description":"<z/0=&#9",
"input":"<z/0=&#9",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"0": "\t"}]]},
{"description":"<z/0z",
"input":"<z/0z",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0z": ""}]]},
{"description":"<z/0 z",
"input":"<z/0 z",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "z": ""}]]},
{"description":"<zz",
"input":"<zz",
"output":["ParseError", ["StartTag", "zz", {}]]},
{"description":"<z/z",
"input":"<z/z",
"output":["ParseError", "ParseError", ["StartTag", "z", {"z": ""}]]}
]}

View file

@ -0,0 +1,198 @@
{"tests": [
{"description":"< in attribute name",
"input":"<z/0 <",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
{"description":"< in attribute value",
"input":"<z x=<",
"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
{"description":"CR EOF after doctype name",
"input":"<!doctype html \r",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"CR EOF in tag name",
"input":"<z\r",
"output":["ParseError", ["StartTag", "z", {}]]},
{"description":"Zero hex numeric entity",
"input":"&#x0",
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
{"description":"Zero decimal numeric entity",
"input":"&#0",
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
{"description":"Zero-prefixed hex numeric entity",
"input":"&#x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041;",
"output":[["Character", "A"]]},
{"description":"Zero-prefixed decimal numeric entity",
"input":"&#000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000065;",
"output":[["Character", "A"]]},
{"description":"Empty hex numeric entities",
"input":"&#x &#X ",
"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]},
{"description":"Empty decimal numeric entities",
"input":"&# &#; ",
"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]},
{"description":"Non-BMP numeric entity",
"input":"&#x10000;",
"output":[["Character", "\uD800\uDC00"]]},
{"description":"Maximum non-BMP numeric entity",
"input":"&#X10FFFF;",
"output":[["Character", "\uDBFF\uDFFF"]]},
{"description":"Above maximum numeric entity",
"input":"&#x110000;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"32-bit hex numeric entity",
"input":"&#x80000041;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"33-bit hex numeric entity",
"input":"&#x100000041;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"33-bit decimal numeric entity",
"input":"&#4294967361;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"65-bit hex numeric entity",
"input":"&#x10000000000000041;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"65-bit decimal numeric entity",
"input":"&#18446744073709551681;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Surrogate code point edge cases",
"input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]},
{"description":"Uppercase start tag name",
"input":"<X>",
"output":[["StartTag", "x", {}]]},
{"description":"Uppercase end tag name",
"input":"</X>",
"output":[["EndTag", "x"]]},
{"description":"Uppercase attribute name",
"input":"<x X>",
"output":[["StartTag", "x", { "x":"" }]]},
{"description":"Tag/attribute name case edge values",
"input":"<x@AZ[`az{ @AZ[`az{>",
"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
{"description":"Duplicate different-case attributes",
"input":"<x x=1 x=2 X=3>",
"output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]},
{"description":"Uppercase close tag attributes",
"input":"</x X>",
"output":["ParseError", ["EndTag", "x"]]},
{"description":"Duplicate close tag attributes",
"input":"</x x x>",
"output":["ParseError", "ParseError", ["EndTag", "x"]]},
{"description":"Permitted slash",
"input":"<br/>",
"output":[["StartTag", "br", {}]]},
{"description":"Non-permitted slash",
"input":"<xr/>",
"output":["ParseError", ["StartTag", "xr", {}]]},
{"description":"Permitted slash but in close tag",
"input":"</br/>",
"output":["ParseError", ["EndTag", "br"]]},
{"description":"Doctype public case-sensitivity (1)",
"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
"output":[["DOCTYPE", "HtMl", "AbC", "XyZ", true]]},
{"description":"Doctype public case-sensitivity (2)",
"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
"output":[["DOCTYPE", "hTmL", "aBc", "xYz", true]]},
{"description":"Doctype system case-sensitivity (1)",
"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
"output":[["DOCTYPE", "HtMl", null, "XyZ", true]]},
{"description":"Doctype system case-sensitivity (2)",
"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
"output":[["DOCTYPE", "hTmL", null, "xYz", true]]},
{"description":"U+0000 in lookahead region after non-matching character",
"input":"<!doc>\u0000",
"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\uFFFD"]],
"ignoreErrorOrder":true},
{"description":"U+0000 in lookahead region",
"input":"<!doc\u0000",
"output":["ParseError", "ParseError", ["Comment", "doc\uFFFD"]],
"ignoreErrorOrder":true},
{"description":"CR followed by U+0000",
"input":"\r\u0000",
"output":["ParseError", ["Character", "\n\uFFFD"]],
"ignoreErrorOrder":true},
{"description":"CR followed by non-LF",
"input":"\r?",
"output":[["Character", "\n?"]]},
{"description":"CR at EOF",
"input":"\r",
"output":[["Character", "\n"]]},
{"description":"LF at EOF",
"input":"\n",
"output":[["Character", "\n"]]},
{"description":"CR LF",
"input":"\r\n",
"output":[["Character", "\n"]]},
{"description":"CR CR",
"input":"\r\r",
"output":[["Character", "\n\n"]]},
{"description":"LF LF",
"input":"\n\n",
"output":[["Character", "\n\n"]]},
{"description":"LF CR",
"input":"\n\r",
"output":[["Character", "\n\n"]]},
{"description":"text CR CR CR text",
"input":"text\r\r\rtext",
"output":[["Character", "text\n\n\ntext"]]},
{"description":"Doctype publik",
"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype publi",
"input":"<!DOCTYPE html PUBLI",
"output":["ParseError", "ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype sistem",
"input":"<!DOCTYPE html SISTEM \"AbC\">",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype sys",
"input":"<!DOCTYPE html SYS",
"output":["ParseError", "ParseError", ["DOCTYPE", "html", null, null, false]]}
]}