Sync with latest HTML5lib

This commit is contained in:
Jacques Distler 2007-06-22 03:12:08 -05:00
parent bf572e295f
commit 8e92e4a3ab
41 changed files with 1334 additions and 564 deletions

View file

@ -15,7 +15,7 @@
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"baz",
"input":"</foo>bar</baz>",
"output":["ParseError", ["Character", "</foo>bar"], ["EndTag", "baz"]]},
"output":[["Character", "</foo>bar"], ["EndTag", "baz"]]},
{"description":"End tag closing RCDATA or CDATA, switching back to PCDATA",
"contentModelFlags":["RCDATA", "CDATA"],

View file

@ -0,0 +1,21 @@
{"tests": [
{"description":"Commented close tag in [R]CDATA",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"bar",
"input":"foo<!--</bar>--></bar>",
"output":[["Character", "foo<!--</bar>-->"], ["EndTag", "bar"]]},
{"description":"Bogus comment in [R]CDATA",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"bar",
"input":"foo<!-->baz</bar>",
"output":[["Character", "foo<!-->baz"], ["EndTag", "bar"]]},
{"description":"End tag surrounded by bogus comment in [R]CDATA",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"bar",
"input":"foo<!--></bar><!-->baz</bar>",
"output":[["Character", "foo<!-->"], ["EndTag", "bar"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "bar"]]}
]}

View file

@ -2,15 +2,15 @@
{"description":"Correct Doctype lowercase",
"input":"<!DOCTYPE html>",
"output":[["DOCTYPE", "HTML", false]]},
"output":[["DOCTYPE", "html", null, null, true]]},
{"description":"Correct Doctype uppercase",
"input":"<!DOCTYPE HTML>",
"output":[["DOCTYPE", "HTML", false]]},
"input":"<!DOCTYPE HTML>",
"output":[["DOCTYPE", "HTML", null, null, true]]},
{"description":"Correct Doctype mixed case",
"input":"<!DOCTYPE HtMl>",
"output":[["DOCTYPE", "HTML", false]]},
"output":[["DOCTYPE", "HtMl", null, null, true]]},
{"description":"Truncated doctype start",
"input":"<!DOC>",
@ -18,7 +18,7 @@
{"description":"Doctype in error",
"input":"<!DOCTYPE foo>",
"output":[["DOCTYPE", "FOO", true]]},
"output":[["DOCTYPE", "foo", null, null, true]]},
{"description":"Single Start Tag",
"input":"<h>",
@ -84,17 +84,38 @@
"input":"<!-",
"output":["ParseError", ["Comment", "-"]]},
{"description":"Ampersand only",
{"description":"Short comment",
"input":"<!-->",
"output":["ParseError", ["Comment", ""]]},
{"description":"Short comment two",
"input":"<!--->",
"output":["ParseError", ["Comment", ""]]},
{"description":"Short comment three",
"input":"<!---->",
"output":[["Comment", ""]]},
{"description":"Ampersand EOF",
"input":"&",
"output":["ParseError", ["Character", "&"]]},
"output":[["Character", "&"]]},
{"description":"Ampersand ampersand EOF",
"input":"&&",
"output":[["Character", "&&"]]},
{"description":"Ampersand space EOF",
"input":"& ",
"output":[["Character", "& "]]},
{"description":"Unfinished entity",
"input":"&f",
"output":["ParseError", ["Character", "&"], ["Character", "f"]]},
"output":["ParseError", ["Character", "&f"]]},
{"description":"Ampersand, number sign",
"input":"&#",
"output":["ParseError", ["Character", "&"], ["Character", "#"]]},
"output":["ParseError", ["Character", "&#"]]},
{"description":"Unfinished numeric entity",
"input":"&#x",
@ -110,8 +131,7 @@
{"description":"Entity without trailing semicolon (1)",
"input":"I'm &notit",
"output":[["Character","I'm "], "ParseError", ["Character", "¬"],
["Character", "it"]]},
"output":[["Character","I'm "], "ParseError", ["Character", "¬it"]]},
{"description":"Entity without trailing semicolon (2)",
"input":"I'm &notin",

View file

@ -1,24 +1,44 @@
{"tests": [
{"description":"Doctype without a name",
{"description":"DOCTYPE without name",
"input":"<!DOCTYPE>",
"output":["ParseError", "ParseError", ["DOCTYPE", "", true]]},
"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
{"description":"Correct doctype without a space before name",
{"description":"DOCTYPE without space before name",
"input":"<!DOCTYPEhtml>",
"output":["ParseError", ["DOCTYPE", "HTML", false]]},
"output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
{"description":"Incorrect doctype without a space before name",
{"description":"Incorrect DOCTYPE without a space before name",
"input":"<!DOCTYPEfoo>",
"output":["ParseError", ["DOCTYPE", "FOO", true]]},
"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
{"description":"Bogus doctype",
{"description":"DOCTYPE with publicId",
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":["ParseError", ["DOCTYPE", "HTML", true]]},
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
{"description":"DOCTYPE with EOF after PUBLIC",
"input":"<!DOCTYPE html PUBLIC",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"DOCTYPE with EOF after PUBLIC '",
"input":"<!DOCTYPE html PUBLIC '",
"output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
{"description":"DOCTYPE with EOF after PUBLIC 'x",
"input":"<!DOCTYPE html PUBLIC 'x",
"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
{"description":"DOCTYPE with systemId",
"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
{"description":"DOCTYPE with publicId and systemId",
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
{"description":"Incomplete doctype",
"input":"<!DOCTYPE html ",
"output":["ParseError", ["DOCTYPE", "HTML", true]]},
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Numeric entity representing the NUL character",
"input":"&#0000;",
@ -30,19 +50,19 @@
{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
"input":"&#2225222;",
"output":["ParseError", ["Character", "\uFFFD"]]},
"output":[["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
"input":"&#x1010FFFF;",
"output":["ParseError", ["Character", "\uFFFD"]]},
"output":[["Character", "\uFFFD"]]},
{"description":"Numeric entity representing a Windows-1252 'codepoint'",
"input":"&#137;",
"output":[["Character", "\u2030"]]},
"output":["ParseError", ["Character", "\u2030"]]},
{"description":"Hexadecimal entity representing a Windows-1252 'codepoint'",
"input":"&#x89;",
"output":[["Character", "\u2030"]]},
"output":["ParseError", ["Character", "\u2030"]]},
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
"input":"&#xaBcD;",
@ -58,7 +78,7 @@
{"description":"StartTag containing <",
"input":"<a<b>",
"output":["ParseError", ["StartTag", "a", { }], ["StartTag", "b", { }]]},
"output":[["StartTag", "a<b", { }]]},
{"description":"Non-void element containing trailing /",
"input":"<h/>",