a6429f8c22
Completely removed the html5lib sanitizer. Fixed the string-handling to work in both Ruby 1.8.x and 1.9.2. There are still, inexplicably, two functional tests that fail. But the rest seems to work quite well.
130 lines
4.3 KiB
Plaintext
130 lines
4.3 KiB
Plaintext
{"tests": [
|
|
|
|
{"description":"DOCTYPE without name",
|
|
"input":"<!DOCTYPE>",
|
|
"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
|
|
|
|
{"description":"DOCTYPE without space before name",
|
|
"input":"<!DOCTYPEhtml>",
|
|
"output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
|
|
|
|
{"description":"Incorrect DOCTYPE without a space before name",
|
|
"input":"<!DOCTYPEfoo>",
|
|
"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
|
|
|
|
{"description":"DOCTYPE with publicId",
|
|
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
|
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
|
|
|
|
{"description":"DOCTYPE with EOF after PUBLIC",
|
|
"input":"<!DOCTYPE html PUBLIC",
|
|
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
|
|
{"description":"DOCTYPE with EOF after PUBLIC '",
|
|
"input":"<!DOCTYPE html PUBLIC '",
|
|
"output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
|
|
|
|
{"description":"DOCTYPE with EOF after PUBLIC 'x",
|
|
"input":"<!DOCTYPE html PUBLIC 'x",
|
|
"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
|
|
|
|
{"description":"DOCTYPE with systemId",
|
|
"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
|
"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
|
|
|
|
{"description":"DOCTYPE with publicId and systemId",
|
|
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
|
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
|
|
|
|
{"description":"Incomplete doctype",
|
|
"input":"<!DOCTYPE html ",
|
|
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
|
|
|
{"description":"Numeric entity representing the NUL character",
|
|
"input":"�",
|
|
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"Hexadecimal entity representing the NUL character",
|
|
"input":"�",
|
|
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
|
|
"input":"�",
|
|
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
|
|
"input":"�",
|
|
"output":["ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"Hexadecimal entity pair representing a surrogate pair",
|
|
"input":"��",
|
|
"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
|
|
|
|
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
|
|
"input":"ꯍ",
|
|
"output":[["Character", "\uABCD"]]},
|
|
|
|
{"description":"Entity without a name",
|
|
"input":"&;",
|
|
"output":["ParseError", ["Character", "&;"]]},
|
|
|
|
{"description":"Unescaped ampersand in attribute value",
|
|
"input":"<h a='&'>",
|
|
"output":["ParseError", ["StartTag", "h", { "a":"&" }]]},
|
|
|
|
{"description":"StartTag containing <",
|
|
"input":"<a<b>",
|
|
"output":[["StartTag", "a<b", { }]]},
|
|
|
|
{"description":"Non-void element containing trailing /",
|
|
"input":"<h/>",
|
|
"output":["ParseError", ["StartTag", "h", { }]]},
|
|
|
|
{"description":"Void element with permitted slash",
|
|
"input":"<br/>",
|
|
"output":[["StartTag", "br", { }]]},
|
|
|
|
{"description":"StartTag containing /",
|
|
"input":"<h/a='b'>",
|
|
"output":["ParseError", ["StartTag", "h", { "a":"b" }]]},
|
|
|
|
{"description":"Double-quoted attribute value",
|
|
"input":"<h a=\"b\">",
|
|
"output":[["StartTag", "h", { "a":"b" }]]},
|
|
|
|
{"description":"Unescaped </",
|
|
"input":"</",
|
|
"output":["ParseError", ["Character", "</"]]},
|
|
|
|
{"description":"Illegal end tag name",
|
|
"input":"</1>",
|
|
"output":["ParseError", ["Comment", "1"]]},
|
|
|
|
{"description":"Simili processing instruction",
|
|
"input":"<?namespace>",
|
|
"output":["ParseError", ["Comment", "?namespace"]]},
|
|
|
|
{"description":"A bogus comment stops at >, even if preceeded by two dashes",
|
|
"input":"<?foo-->",
|
|
"output":["ParseError", ["Comment", "?foo--"]]},
|
|
|
|
{"description":"Unescaped <",
|
|
"input":"foo < bar",
|
|
"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
|
|
|
|
{"description":"Null Byte Replacement",
|
|
"input":"\u0000",
|
|
"output":["ParseError", ["Character", "\ufffd"]]},
|
|
|
|
{"description":"Comment with dash",
|
|
"input":"<!---x",
|
|
"output":["ParseError", ["Comment", "-x"]]},
|
|
|
|
{"description":"Entity + newline",
|
|
"input":"\nx\n>\n",
|
|
"output":[["Character","\nx\n>\n"]]}
|
|
|
|
]}
|
|
|
|
|