instiki/attic/vendor/plugins/HTML5lib/testdata/tokenizer/test2.test

{"tests": [

{"description":"DOCTYPE without name",
"input":"<!DOCTYPE>",
"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},

{"description":"DOCTYPE without space before name",
"input":"<!DOCTYPEhtml>",
"output":["ParseError", ["DOCTYPE", "html", null, null, true]]},

{"description":"Incorrect DOCTYPE without a space before name",
"input":"<!DOCTYPEfoo>",
"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},

{"description":"DOCTYPE with publicId",
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},

{"description":"DOCTYPE with EOF after PUBLIC",
"input":"<!DOCTYPE html PUBLIC",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},

{"description":"DOCTYPE with EOF after PUBLIC '",
"input":"<!DOCTYPE html PUBLIC '",
"output":["ParseError", ["DOCTYPE", "html", "", null, false]]},

{"description":"DOCTYPE with EOF after PUBLIC 'x",
"input":"<!DOCTYPE html PUBLIC 'x",
"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},

{"description":"DOCTYPE with systemId",
"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},

{"description":"DOCTYPE with publicId and systemId",
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},

{"description":"Incomplete doctype",
"input":"<!DOCTYPE html ",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},

{"description":"Numeric entity representing the NUL character",
"input":"&#0000;",
"output":["ParseError", ["Character", "\uFFFD"]]},

{"description":"Hexadecimal entity representing the NUL character",
"input":"&#x0000;",
"output":["ParseError", ["Character", "\uFFFD"]]},

{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
"input":"&#2225222;",
"output":["ParseError", ["Character", "\uFFFD"]]},

{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
"input":"&#x1010FFFF;",
"output":["ParseError", ["Character", "\uFFFD"]]},

{"description":"Hexadecimal entity pair representing a surrogate pair",
"input":"&#xD869;&#xDED6;",
"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},

{"description":"Hexadecimal entity with mixed uppercase and lowercase",
"input":"&#xaBcD;",
"output":[["Character", "\uABCD"]]},

{"description":"Entity without a name",
"input":"&;",
"output":["ParseError", ["Character", "&;"]]},

{"description":"Unescaped ampersand in attribute value",
"input":"<h a='&'>",
"output":["ParseError", ["StartTag", "h", { "a":"&" }]]},

{"description":"StartTag containing <",
"input":"<a<b>",
"output":[["StartTag", "a<b", { }]]},

{"description":"Non-void element containing trailing /",
"input":"<h/>",
"output":["ParseError", ["StartTag", "h", { }]]},

{"description":"Void element with permitted slash",
"input":"<br/>",
"output":[["StartTag", "br", { }]]},

{"description":"StartTag containing /",
"input":"<h/a='b'>",
"output":["ParseError", ["StartTag", "h", { "a":"b" }]]},

{"description":"Double-quoted attribute value",
"input":"<h a=\"b\">",
"output":[["StartTag", "h", { "a":"b" }]]},

{"description":"Unescaped </",
"input":"</",
"output":["ParseError", ["Character", "</"]]},

{"description":"Illegal end tag name",
"input":"</1>",
"output":["ParseError", ["Comment", "1"]]},

{"description":"Simili processing instruction",
"input":"<?namespace>",
"output":["ParseError", ["Comment", "?namespace"]]},

{"description":"A bogus comment stops at >, even if preceeded by two dashes",
"input":"<?foo-->",
"output":["ParseError", ["Comment", "?foo--"]]},

{"description":"Unescaped <",
"input":"foo < bar",
"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},

{"description":"Null Byte Replacement",
"input":"\u0000",
"output":["ParseError", ["Character", "\ufffd"]]},

{"description":"Comment with dash",
"input":"<!---x",
"output":["ParseError", ["Comment", "-x"]]},

{"description":"Entity + newline",
"input":"\nx\n&gt;\n",
"output":[["Character","\nx\n>\n"]]}

]}