Sync with latest HTML5lib

This commit is contained in:
Jacques Distler 2007-06-11 23:33:06 -05:00
parent c2bfdefa57
commit 0ddd422059
25 changed files with 39581 additions and 302 deletions

View file

@ -0,0 +1,36 @@
{"tests": [
{"description":"PLAINTEXT content model flag",
"contentModelFlags":["PLAINTEXT"],
"input":"<head>&body;",
"output":[["Character", "<head>&body;"]]},
{"description":"End tag closing RCDATA or CDATA",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"bar",
"input":"foo</bar>",
"output":[["Character", "foo"], ["EndTag", "bar"]]},
{"description":"End tag with incorrect name in RCDATA or CDATA",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"baz",
"input":"</foo>bar</baz>",
"output":["ParseError", ["Character", "</foo>bar"], ["EndTag", "baz"]]},
{"description":"End tag closing RCDATA or CDATA, switching back to PCDATA",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"bar",
"input":"foo</bar></baz>",
"output":[["Character", "foo"], ["EndTag", "bar"], ["EndTag", "baz"]]},
{"description":"CDATA w/ something looking like an entity",
"contentModelFlags":["CDATA"],
"input":"&foo;",
"output":[["Character", "&foo;"]]},
{"description":"RCDATA w/ an entity",
"contentModelFlags":["RCDATA"],
"input":"&lt;",
"output":[["Character", "<"]]}
]}

View file

@ -0,0 +1,136 @@
{"tests": [
{"description":"Correct Doctype lowercase",
"input":"<!DOCTYPE html>",
"output":[["DOCTYPE", "HTML", false]]},
{"description":"Correct Doctype uppercase",
"input":"<!DOCTYPE HTML>",
"output":[["DOCTYPE", "HTML", false]]},
{"description":"Correct Doctype mixed case",
"input":"<!DOCTYPE HtMl>",
"output":[["DOCTYPE", "HTML", false]]},
{"description":"Truncated doctype start",
"input":"<!DOC>",
"output":["ParseError", ["Comment", "DOC"]]},
{"description":"Doctype in error",
"input":"<!DOCTYPE foo>",
"output":[["DOCTYPE", "FOO", true]]},
{"description":"Single Start Tag",
"input":"<h>",
"output":[["StartTag", "h", {}]]},
{"description":"Empty end tag",
"input":"</>",
"output":["ParseError"]},
{"description":"Empty start tag",
"input":"<>",
"output":["ParseError", ["Character", "<>"]]},
{"description":"Start Tag w/attribute",
"input":"<h a='b'>",
"output":[["StartTag", "h", {"a":"b"}]]},
{"description":"Start Tag w/attribute no quotes",
"input":"<h a=b>",
"output":[["StartTag", "h", {"a":"b"}]]},
{"description":"Start/End Tag",
"input":"<h></h>",
"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
{"description":"Two unclosed start tags",
"input":"<p>One<p>Two",
"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
{"description":"End Tag w/attribute",
"input":"<h></h a='b'>",
"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
{"description":"Multiple atts",
"input":"<h a='b' c='d'>",
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
{"description":"Multiple atts no space",
"input":"<h a='b'c='d'>",
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
{"description":"Repeated attr",
"input":"<h a='b' a='d'>",
"output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
{"description":"Simple comment",
"input":"<!--comment-->",
"output":[["Comment", "comment"]]},
{"description":"Comment, Central dash no space",
"input":"<!----->",
"output":["ParseError", ["Comment", "-"]]},
{"description":"Comment, two central dashes",
"input":"<!-- --comment -->",
"output":["ParseError", ["Comment", " --comment "]]},
{"description":"Unfinished comment",
"input":"<!--comment",
"output":["ParseError", ["Comment", "comment"]]},
{"description":"Start of a comment",
"input":"<!-",
"output":["ParseError", ["Comment", "-"]]},
{"description":"Ampersand only",
"input":"&",
"output":["ParseError", ["Character", "&"]]},
{"description":"Unfinished entity",
"input":"&f",
"output":["ParseError", ["Character", "&"], ["Character", "f"]]},
{"description":"Ampersand, number sign",
"input":"&#",
"output":["ParseError", ["Character", "&"], ["Character", "#"]]},
{"description":"Unfinished numeric entity",
"input":"&#x",
"output":["ParseError", ["Character", "&#x"]]},
{"description":"Entity with trailing semicolon (1)",
"input":"I'm &not;it",
"output":[["Character","I'm ¬it"]]},
{"description":"Entity with trailing semicolon (2)",
"input":"I'm &notin;",
"output":[["Character","I'm ∉"]]},
{"description":"Entity without trailing semicolon (1)",
"input":"I'm &notit",
"output":[["Character","I'm "], "ParseError", ["Character", "¬"],
["Character", "it"]]},
{"description":"Entity without trailing semicolon (2)",
"input":"I'm &notin",
"output":[["Character","I'm "], "ParseError", ["Character", "∉"]]},
{"description":"Partial entity match at end of file",
"input":"I'm &no",
"output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},
{"description":"ASCII decimal entity",
"input":"&#0036;",
"output":[["Character","$"]]},
{"description":"ASCII hexadecimal entity",
"input":"&#x3f;",
"output":[["Character","?"]]},
{"description":"Hexadecimal entity in attribute",
"input":"<h a='&#x3f;'></h>",
"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]}
]}

View file

@ -0,0 +1,105 @@
{"tests": [
{"description":"Doctype without a name",
"input":"<!DOCTYPE>",
"output":["ParseError", "ParseError", ["DOCTYPE", "", true]]},
{"description":"Correct doctype without a space before name",
"input":"<!DOCTYPEhtml>",
"output":["ParseError", ["DOCTYPE", "HTML", false]]},
{"description":"Incorrect doctype without a space before name",
"input":"<!DOCTYPEfoo>",
"output":["ParseError", ["DOCTYPE", "FOO", true]]},
{"description":"Bogus doctype",
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":["ParseError", ["DOCTYPE", "HTML", true]]},
{"description":"Incomplete doctype",
"input":"<!DOCTYPE html ",
"output":["ParseError", ["DOCTYPE", "HTML", true]]},
{"description":"Numeric entity representing the NUL character",
"input":"&#0000;",
"output":[["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity representing the NUL character",
"input":"&#x0000;",
"output":[["Character", "\uFFFD"]]},
{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
"input":"&#2225222;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
"input":"&#x1010FFFF;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Numeric entity representing a Windows-1252 'codepoint'",
"input":"&#137;",
"output":[["Character", "\u2030"]]},
{"description":"Hexadecimal entity representing a Windows-1252 'codepoint'",
"input":"&#x89;",
"output":[["Character", "\u2030"]]},
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
"input":"&#xaBcD;",
"output":[["Character", "\uABCD"]]},
{"description":"Entity without a name",
"input":"&;",
"output":["ParseError", ["Character", "&;"]]},
{"description":"Unescaped ampersand in attribute value",
"input":"<h a='&'>",
"output":["ParseError", ["StartTag", "h", { "a":"&" }]]},
{"description":"StartTag containing <",
"input":"<a<b>",
"output":["ParseError", ["StartTag", "a", { }], ["StartTag", "b", { }]]},
{"description":"Non-void element containing trailing /",
"input":"<h/>",
"output":["ParseError", ["StartTag", "h", { }]]},
{"description":"Void element with permitted slash",
"input":"<br/>",
"output":[["StartTag", "br", { }]]},
{"description":"StartTag containing /",
"input":"<h/a='b'>",
"output":["ParseError", ["StartTag", "h", { "a":"b" }]]},
{"description":"Double-quoted attribute value",
"input":"<h a=\"b\">",
"output":[["StartTag", "h", { "a":"b" }]]},
{"description":"Unescaped </",
"input":"</",
"output":["ParseError", ["Character", "</"]]},
{"description":"Illegal end tag name",
"input":"</1>",
"output":["ParseError", ["Comment", "1"]]},
{"description":"Simili processing instruction",
"input":"<?namespace>",
"output":["ParseError", ["Comment", "?namespace"]]},
{"description":"A bogus comment stops at >, even if preceeded by two dashes",
"input":"<?foo-->",
"output":["ParseError", ["Comment", "?foo--"]]},
{"description":"Unescaped <",
"input":"foo < bar",
"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
{"description":"Null Byte Replacement",
"input":"\u0000",
"output":[["Character", "\ufffd"]]}
]}