a6429f8c22
Completely removed the html5lib sanitizer. Fixed the string-handling to work in both Ruby 1.8.x and 1.9.2. There are still, inexplicably, two functional tests that fail. But the rest seems to work quite well.
172 lines
4.7 KiB
Text
172 lines
4.7 KiB
Text
{"tests": [
|
|
|
|
{"description":"Correct Doctype lowercase",
|
|
"input":"<!DOCTYPE html>",
|
|
"output":[["DOCTYPE", "html", null, null, true]]},
|
|
|
|
{"description":"Correct Doctype uppercase",
|
|
"input":"<!DOCTYPE HTML>",
|
|
"output":[["DOCTYPE", "HTML", null, null, true]]},
|
|
|
|
{"description":"Correct Doctype mixed case",
|
|
"input":"<!DOCTYPE HtMl>",
|
|
"output":[["DOCTYPE", "HtMl", null, null, true]]},
|
|
|
|
{"description":"Truncated doctype start",
|
|
"input":"<!DOC>",
|
|
"output":["ParseError", ["Comment", "DOC"]]},
|
|
|
|
{"description":"Doctype in error",
|
|
"input":"<!DOCTYPE foo>",
|
|
"output":[["DOCTYPE", "foo", null, null, true]]},
|
|
|
|
{"description":"Single Start Tag",
|
|
"input":"<h>",
|
|
"output":[["StartTag", "h", {}]]},
|
|
|
|
{"description":"Empty end tag",
|
|
"input":"</>",
|
|
"output":["ParseError"]},
|
|
|
|
{"description":"Empty start tag",
|
|
"input":"<>",
|
|
"output":["ParseError", ["Character", "<>"]]},
|
|
|
|
{"description":"Start Tag w/attribute",
|
|
"input":"<h a='b'>",
|
|
"output":[["StartTag", "h", {"a":"b"}]]},
|
|
|
|
{"description":"Start Tag w/attribute no quotes",
|
|
"input":"<h a=b>",
|
|
"output":[["StartTag", "h", {"a":"b"}]]},
|
|
|
|
{"description":"Start/End Tag",
|
|
"input":"<h></h>",
|
|
"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
|
|
|
|
{"description":"Two unclosed start tags",
|
|
"input":"<p>One<p>Two",
|
|
"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
|
|
|
|
{"description":"End Tag w/attribute",
|
|
"input":"<h></h a='b'>",
|
|
"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
|
|
|
|
{"description":"Multiple atts",
|
|
"input":"<h a='b' c='d'>",
|
|
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
|
|
|
|
{"description":"Multiple atts no space",
|
|
"input":"<h a='b'c='d'>",
|
|
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
|
|
|
|
{"description":"Repeated attr",
|
|
"input":"<h a='b' a='d'>",
|
|
"output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
|
|
|
|
{"description":"Simple comment",
|
|
"input":"<!--comment-->",
|
|
"output":[["Comment", "comment"]]},
|
|
|
|
{"description":"Comment, Central dash no space",
|
|
"input":"<!----->",
|
|
"output":["ParseError", ["Comment", "-"]]},
|
|
|
|
{"description":"Comment, two central dashes",
|
|
"input":"<!-- --comment -->",
|
|
"output":["ParseError", ["Comment", " --comment "]]},
|
|
|
|
{"description":"Unfinished comment",
|
|
"input":"<!--comment",
|
|
"output":["ParseError", ["Comment", "comment"]]},
|
|
|
|
{"description":"Start of a comment",
|
|
"input":"<!-",
|
|
"output":["ParseError", ["Comment", "-"]]},
|
|
|
|
{"description":"Short comment",
|
|
"input":"<!-->",
|
|
"output":["ParseError", ["Comment", ""]]},
|
|
|
|
{"description":"Short comment two",
|
|
"input":"<!--->",
|
|
"output":["ParseError", ["Comment", ""]]},
|
|
|
|
{"description":"Short comment three",
|
|
"input":"<!---->",
|
|
"output":[["Comment", ""]]},
|
|
|
|
|
|
{"description":"Ampersand EOF",
|
|
"input":"&",
|
|
"output":[["Character", "&"]]},
|
|
|
|
{"description":"Ampersand ampersand EOF",
|
|
"input":"&&",
|
|
"output":[["Character", "&&"]]},
|
|
|
|
{"description":"Ampersand space EOF",
|
|
"input":"& ",
|
|
"output":[["Character", "& "]]},
|
|
|
|
{"description":"Unfinished entity",
|
|
"input":"&f",
|
|
"output":["ParseError", ["Character", "&f"]]},
|
|
|
|
{"description":"Ampersand, number sign",
|
|
"input":"&#",
|
|
"output":["ParseError", ["Character", "&#"]]},
|
|
|
|
{"description":"Unfinished numeric entity",
|
|
"input":"&#x",
|
|
"output":["ParseError", ["Character", "&#x"]]},
|
|
|
|
{"description":"Entity with trailing semicolon (1)",
|
|
"input":"I'm ¬it",
|
|
"output":[["Character","I'm ¬it"]]},
|
|
|
|
{"description":"Entity with trailing semicolon (2)",
|
|
"input":"I'm ∉",
|
|
"output":[["Character","I'm ∉"]]},
|
|
|
|
{"description":"Entity without trailing semicolon (1)",
|
|
"input":"I'm ¬it",
|
|
"output":[["Character","I'm "], "ParseError", ["Character", "¬it"]]},
|
|
|
|
{"description":"Entity without trailing semicolon (2)",
|
|
"input":"I'm ¬in",
|
|
"output":[["Character","I'm "], "ParseError", ["Character", "¬in"]]},
|
|
|
|
{"description":"Partial entity match at end of file",
|
|
"input":"I'm &no",
|
|
"output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},
|
|
|
|
{"description":"ASCII decimal entity",
|
|
"input":"$",
|
|
"output":[["Character","$"]]},
|
|
|
|
{"description":"ASCII hexadecimal entity",
|
|
"input":"?",
|
|
"output":[["Character","?"]]},
|
|
|
|
{"description":"Hexadecimal entity in attribute",
|
|
"input":"<h a='?'></h>",
|
|
"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
|
|
|
|
{"description":"Entity in attribute without semicolon ending in x",
|
|
"input":"<h a='¬x'>",
|
|
"output":["ParseError", ["StartTag", "h", {"a":"¬x"}]]},
|
|
|
|
{"description":"Entity in attribute without semicolon ending in 1",
|
|
"input":"<h a='¬1'>",
|
|
"output":["ParseError", ["StartTag", "h", {"a":"¬1"}]]},
|
|
|
|
{"description":"Entity in attribute without semicolon ending in i",
|
|
"input":"<h a='¬i'>",
|
|
"output":["ParseError", ["StartTag", "h", {"a":"¬i"}]]},
|
|
|
|
{"description":"Entity in attribute without semicolon",
|
|
"input":"<h a='©'>",
|
|
"output":["ParseError", ["StartTag", "h", {"a":"©"}]]}
|
|
|
|
]}
|