Sync with latest HTML5lib

This commit is contained in:
Jacques Distler 2007-06-11 23:33:06 -05:00
parent c2bfdefa57
commit 0ddd422059
25 changed files with 39581 additions and 302 deletions

View file

@ -181,7 +181,7 @@ module HTML5lib
end
elsif token[:type] == :EndTag
parseError(_('End tag contains unexpected attributes.')) if token[:data]
parseError(_('End tag contains unexpected attributes.')) unless token[:data].empty?
token[:name] = token[:name].downcase
end

View file

@ -0,0 +1,51 @@
老子《道德經》 第一~四十章
老子道經
第一章
道可道,非常道。名可名,非常名。無,名天地之始﹔有,名萬物之母。
故常無,欲以觀其妙;常有,欲以觀其徼。此兩者,同出而異名,同謂之
玄。玄之又玄,眾妙之門。
第二章
天下皆知美之為美,斯惡矣﹔皆知善之為善,斯不善矣。故有無相生,難
易相成,長短相形,高下相傾,音聲相和,前後相隨。是以聖人處「無為
」之事,行「不言」之教。萬物作焉而不辭,生而不有,為而不恃,功成
而弗居。夫唯弗居,是以不去。
第三章
不尚賢,使民不爭﹔不貴難得之貨,使民不為盜﹔不見可欲,使民心不亂
。是以「聖人」之治,虛其心,實其腹,弱其志,強其骨。常使民無知無
欲。使夫智者不敢為也。為「無為」,則無不治。
第四章
「道」沖,而用之或不盈。淵兮,似萬物之宗﹔挫其銳,解其紛,和其光
,同其塵﹔湛兮似或存。吾不知誰之子?象帝之先。
第五章
天地不仁,以萬物為芻狗﹔聖人不仁,以百姓為芻狗。天地之間,其猶橐
蘥乎?虛而不屈,動而愈出。多言數窮,不如守中。
第六章
谷神不死,是謂玄牝。玄牝之門,是謂天地根。綿綿若存,用之不勤。
第七章
天長地久。天地所以能長且久者,以其不自生,故能長久。是以聖人後其
身而身先,外其身而身存。非以其無私邪?故能成其私。
第八章
上善若水。水善利萬物而不爭。處眾人之所惡,故幾於道。居善地,心善
淵,與善仁,言善信,政善治,事善能,動善時。夫唯不爭,故無尤。
第九章
持而盈之,不如其已﹔揣而銳之,不可長保。金玉滿堂,莫之能守﹔富貴
而驕,自遺其咎。功遂身退,天之道。

View file

@ -0,0 +1,10 @@
#data
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=euc-jp">
<!--京-->
<title>Yahoo! JAPAN</title>
<meta name="description" content="日本最大級のポータルサイト。検索、オークション、ニュース、メール、コミュニティ、ショッピング、など80以上のサービスを展開。あなたの生活をより豊かにする「ライフ・エンジン」を目指していきます。">
<style type="text/css" media="all">
#encoding
euc-jp

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,82 @@
#data
<meta
#encoding
windows-1252
#data
<
#encoding
windows-1252
#data
<!
#encoding
windows-1252
#data
<meta charset = "
#encoding
windows-1252
#data
<meta charset=EUC-jp
#encoding
windows-1252
#data
<meta <meta charset='EUC-jp'>
#encoding
EUC-jp
#data
<meta charset = 'EUC-jp'>
#encoding
EUC-jp
#data
<!-- -->
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
#encoding
utf-8
#data
<!-- -->
<meta http-equiv="Content-Type" content="text/html; charset=utf
#encoding
windows-1252
#data
<meta http-equiv="Content-Type<meta charset="utf-8">
#encoding
windows-1252
#data
<meta http-equiv="Content-Type" content="text/html; charset='utf-8'">
#encoding
utf-8
#data
<meta http-equiv="Content-Type" content="text/html; charset='utf-8">
#encoding
windows-1252
#data
<meta
#encoding
windows-1252
#data
<meta charset =
#encoding
windows-1252
#data
<meta charset= utf-8
#encoding
windows-1252
#data
<meta content = "text/html;
#encoding
windows-1252

View file

@ -0,0 +1,409 @@
[
{
"name": "IE_Comments",
"input": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->",
"output": ""
},
{
"name": "IE_Comments_2",
"input": "<![if !IE 5]><script>alert('XSS');</script><![endif]>",
"output": "&lt;script&gt;alert('XSS');&lt;/script&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "allow_colons_in_path_component",
"input": "<a href=\"./this:that\">foo</a>",
"output": "<a href='./this:that'>foo</a>"
},
{
"name": "background_attribute",
"input": "<div background=\"javascript:alert('XSS')\"></div>",
"output": "<div/>",
"xhtml": "<div></div>",
"rexml": "<div></div>"
},
{
"name": "bgsound",
"input": "<bgsound src=\"javascript:alert('XSS');\" />",
"output": "&lt;bgsound src=\"javascript:alert('XSS');\"/&gt;",
"rexml": "&lt;bgsound src=\"javascript:alert('XSS');\"&gt;&lt;/bgsound&gt;"
},
{
"name": "div_background_image_unicode_encoded",
"input": "<div style=\"background-image:\a5\a2\006C\0028'\006a\0061\a6\0061\a3\0063\a2\0069\a0\a4\003a\0061\006c\0065\a2\a4\0028.1027\0058.1053\0053\0027\0029'\0029\">foo</div>",
"output": "<div style=''>foo</div>"
},
{
"name": "div_expression",
"input": "<div style=\"width: expression(alert('XSS'));\">foo</div>",
"output": "<div style=''>foo</div>"
},
{
"name": "double_open_angle_brackets",
"input": "<img src=http://ha.ckers.org/scriptlet.html <",
"output": "<img src='http://ha.ckers.org/scriptlet.html'/>&lt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "double_open_angle_brackets_2",
"input": "<script src=http://ha.ckers.org/scriptlet.html <",
"output": "&lt;script src=\"http://ha.ckers.org/scriptlet.html\"&gt;&lt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "grave_accents",
"input": "<img src=`javascript:alert('XSS')` />",
"output": "<img/>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "img_dynsrc_lowsrc",
"input": "<img dynsrc=\"javascript:alert('XSS')\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "img_vbscript",
"input": "<img src='vbscript:msgbox(\"XSS\")' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "input_image",
"input": "<input type=\"image\" src=\"javascript:alert('XSS');\" />",
"output": "<input type='image'/>",
"rexml": "<input type='image' />"
},
{
"name": "link_stylesheets",
"input": "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\" />",
"output": "&lt;link rel=\"stylesheet\" href=\"javascript:alert('XSS');\"/&gt;",
"rexml": "&lt;link href=\"javascript:alert('XSS');\" rel=\"stylesheet\"/&gt;"
},
{
"name": "link_stylesheets_2",
"input": "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\" />",
"output": "&lt;link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\"/&gt;",
"rexml": "&lt;link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\"/&gt;"
},
{
"name": "list_style_image",
"input": "<li style=\"list-style-image: url(javascript:alert('XSS'))\">foo</li>",
"output": "<li style=''>foo</li>"
},
{
"name": "no_closing_script_tags",
"input": "<script src=http://ha.ckers.org/xss.js?<b>",
"output": "&lt;script src=\"http://ha.ckers.org/xss.js?\"&gt;<b/>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "non_alpha_non_digit",
"input": "<script/XSS src=\"http://ha.ckers.org/xss.js\"></script>",
"output": "&lt;script XSS=\"\" src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "non_alpha_non_digit_2",
"input": "<a onclick!\#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>",
"output": "<a>foo</a>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "non_alpha_non_digit_3",
"input": "<img/src=\"http://ha.ckers.org/xss.js\"/>",
"output": "<img src='http://ha.ckers.org/xss.js'/>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "non_alpha_non_digit_II",
"input": "<a href!\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>",
"output": "<a>foo</a>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "non_alpha_non_digit_III",
"input": "<a/href=\"javascript:alert('XSS');\">foo</a>",
"output": "<a>foo</a>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "platypus",
"input": "<a href=\"http://www.ragingplatypus.com/\" style=\"display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;\">never trust your upstream platypus</a>",
"output": "<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>"
},
{
"name": "protocol_resolution_in_script_tag",
"input": "<script src=//ha.ckers.org/.j></script>",
"output": "&lt;script src=\"//ha.ckers.org/.j\"&gt;&lt;/script&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_allow_anchors",
"input": "<a href='foo' onclick='bar'><script>baz</script></a>",
"output": "<a href='foo'>&lt;script&gt;baz&lt;/script&gt;</a>"
},
{
"name": "should_allow_image_alt_attribute",
"input": "<img alt='foo' onclick='bar' />",
"output": "<img alt='foo'/>",
"rexml": "<img alt='foo' />"
},
{
"name": "should_allow_image_height_attribute",
"input": "<img height='foo' onclick='bar' />",
"output": "<img height='foo'/>",
"rexml": "<img height='foo' />"
},
{
"name": "should_allow_image_src_attribute",
"input": "<img src='foo' onclick='bar' />",
"output": "<img src='foo'/>",
"rexml": "<img src='foo' />"
},
{
"name": "should_allow_image_width_attribute",
"input": "<img width='foo' onclick='bar' />",
"output": "<img width='foo'/>",
"rexml": "<img width='foo' />"
},
{
"name": "should_handle_blank_text",
"input": "",
"output": ""
},
{
"name": "should_handle_malformed_image_tags",
"input": "<img \"\"\"><script>alert(\"XSS\")</script>\">",
"output": "<img/>&lt;script&gt;alert(\"XSS\")&lt;/script&gt;\"&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_handle_non_html",
"input": "abc",
"output": "abc"
},
{
"name": "should_not_fall_for_ridiculous_hack",
"input": "<img\nsrc\n=\n\"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n\"\n />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_0",
"input": "<img src=\"javascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_1",
"input": "<img src=javascript:alert('XSS') />",
"output": "<img/>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_not_fall_for_xss_image_hack_10",
"input": "<img src=\"jav&#x0A;ascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_11",
"input": "<img src=\"jav&#x0D;ascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_12",
"input": "<img src=\" &#14; javascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_13",
"input": "<img src=\"&#x20;javascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_14",
"input": "<img src=\"&#xA0;javascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_2",
"input": "<img src=\"JaVaScRiPt:alert('XSS')\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_3",
"input": "<img src='javascript:alert(&quot;XSS&quot;)' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_4",
"input": "<img src='javascript:alert(String.fromCharCode(88,83,83))' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_5",
"input": "<img src='&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_6",
"input": "<img src='&#0000106;&#0000097;&#0000118;&#0000097;&#0000115;&#0000099;&#0000114;&#0000105;&#0000112;&#0000116;&#0000058;&#0000097;&#0000108;&#0000101;&#0000114;&#0000116;&#0000040;&#0000039;&#0000088;&#0000083;&#0000083;&#0000039;&#0000041' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_7",
"input": "<img src='&#x6A;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3A;&#x61;&#x6C;&#x65;&#x72;&#x74;&#x28;&#x27;&#x58;&#x53;&#x53;&#x27;&#x29' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_8",
"input": "<img src=\"jav\tascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_9",
"input": "<img src=\"jav&#x09;ascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_sanitize_half_open_scripts",
"input": "<img src=\"javascript:alert('XSS')\"",
"output": "<img/>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_sanitize_invalid_script_tag",
"input": "<script/XSS SRC=\"http://ha.ckers.org/xss.js\"></script>",
"output": "&lt;script XSS=\"\" SRC=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_sanitize_script_tag_with_multiple_open_brackets",
"input": "<<script>alert(\"XSS\");//<</script>",
"output": "&lt;&lt;script&gt;alert(\"XSS\");//&lt;&lt;/script&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_sanitize_script_tag_with_multiple_open_brackets_2",
"input": "<iframe src=http://ha.ckers.org/scriptlet.html\n<",
"output": "&lt;iframe src=\"http://ha.ckers.org/scriptlet.html\"&gt;&lt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_sanitize_tag_broken_up_by_null",
"input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>",
"output": "&lt;scr\ufffdipt&gt;alert(\"XSS\")&lt;/scr\ufffdipt&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_sanitize_unclosed_script",
"input": "<script src=http://ha.ckers.org/xss.js?<b>",
"output": "&lt;script src=\"http://ha.ckers.org/xss.js?\"&gt;<b/>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_strip_href_attribute_in_a_with_bad_protocols",
"input": "<a href=\"javascript:XSS\" title=\"1\">boo</a>",
"output": "<a title='1'>boo</a>"
},
{
"name": "should_strip_href_attribute_in_a_with_bad_protocols_and_whitespace",
"input": "<a href=\" javascript:XSS\" title=\"1\">boo</a>",
"output": "<a title='1'>boo</a>"
},
{
"name": "should_strip_src_attribute_in_img_with_bad_protocols",
"input": "<img src=\"javascript:XSS\" title=\"1\">boo</img>",
"output": "<img title='1'/>boo",
"rexml": "<img title='1' />"
},
{
"name": "should_strip_src_attribute_in_img_with_bad_protocols_and_whitespace",
"input": "<img src=\" javascript:XSS\" title=\"1\">boo</img>",
"output": "<img title='1'/>boo",
"rexml": "<img title='1' />"
},
{
"name": "xml_base",
"input": "<div xml:base=\"javascript:alert('XSS');//\">foo</div>",
"output": "<div>foo</div>"
},
{
"name": "xul",
"input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>",
"output": "<p style=''>fubar</p>"
}
]

View file

@ -0,0 +1,88 @@
{"tests": [
{"description": "proper attribute value escaping",
"input": [["StartTag", "span", {"title": "test \"with\" &quot;"}]],
"expected": ["<span title='test \"with\" &amp;quot;'>"]
},
{"description": "proper attribute value non-quoting",
"input": [["StartTag", "span", {"title": "foo"}]],
"expected": ["<span title=foo>"]
},
{"description": "proper attribute value quoting (with >)",
"input": [["StartTag", "span", {"title": "foo>bar"}]],
"expected": ["<span title=\"foo>bar\">"]
},
{"description": "proper attribute value quoting (with <)",
"input": [["StartTag", "span", {"title": "foo<bar"}]],
"expected": ["<span title=\"foo<bar\">"]
},
{"description": "proper attribute value quoting (with \")",
"input": [["StartTag", "span", {"title": "foo\"bar"}]],
"expected": ["<span title='foo\"bar'>"]
},
{"description": "proper attribute value quoting (with ')",
"input": [["StartTag", "span", {"title": "foo'bar"}]],
"expected": ["<span title=\"foo'bar\">"]
},
{"description": "proper attribute value quoting (with both \" and ')",
"input": [["StartTag", "span", {"title": "foo'bar\"baz"}]],
"expected": ["<span title=\"foo'bar&quot;baz\">"]
},
{"description": "proper attribute value quoting (with space)",
"input": [["StartTag", "span", {"title": "foo bar"}]],
"expected": ["<span title=\"foo bar\">"]
},
{"description": "proper attribute value quoting (with tab)",
"input": [["StartTag", "span", {"title": "foo\tbar"}]],
"expected": ["<span title=\"foo\tbar\">"]
},
{"description": "proper attribute value quoting (with LF)",
"input": [["StartTag", "span", {"title": "foo\nbar"}]],
"expected": ["<span title=\"foo\nbar\">"]
},
{"description": "proper attribute value quoting (with CR)",
"input": [["StartTag", "span", {"title": "foo\rbar"}]],
"expected": ["<span title=\"foo\rbar\">"]
},
{"description": "proper attribute value quoting (with linetab)",
"input": [["StartTag", "span", {"title": "foo\u000Bbar"}]],
"expected": ["<span title=\"foo\u000Bbar\">"]
},
{"description": "proper attribute value quoting (with form feed)",
"input": [["StartTag", "span", {"title": "foo\u000Cbar"}]],
"expected": ["<span title=\"foo\u000Cbar\">"]
},
{"description": "void element (as EmptyTag token)",
"input": [["EmptyTag", "img", {}]],
"expected": ["<img>"]
},
{"description": "void element (as StartTag token)",
"input": [["StartTag", "img", {}]],
"expected": ["<img>"]
},
{"description": "doctype in error",
"input": [["Doctype", "foo"]],
"expected": ["<!DOCTYPE foo>"]
},
{"description": "doctype",
"input": [["Doctype", "HTML"]],
"expected": ["<!DOCTYPE HTML>"]
}
]}

View file

@ -0,0 +1,900 @@
{"tests": [
{"description": "html start-tag followed by text, with attributes",
"input": [["StartTag", "html", {"lang": "en"}], ["Characters", "foo"]],
"expected": ["<html lang=en>foo"]
},
{"description": "html start-tag followed by comment",
"input": [["StartTag", "html", {}], ["Comment", "foo"]],
"expected": ["<html><!--foo-->"]
},
{"description": "html start-tag followed by space character",
"input": [["StartTag", "html", {}], ["Characters", " foo"]],
"expected": ["<html> foo"]
},
{"description": "html start-tag followed by text",
"input": [["StartTag", "html", {}], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "html start-tag followed by start-tag",
"input": [["StartTag", "html", {}], ["StartTag", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "html start-tag followed by end-tag",
"input": [["StartTag", "html", {}], ["EndTag", "foo", {}]],
"expected": ["</foo>"]
},
{"description": "html start-tag at EOF (shouldn't ever happen?!)",
"input": [["StartTag", "html", {}]],
"expected": [""]
},
{"description": "html end-tag followed by comment",
"input": [["EndTag", "html"], ["Comment", "foo"]],
"expected": ["</html><!--foo-->"]
},
{"description": "html end-tag followed by space character",
"input": [["EndTag", "html"], ["Characters", " foo"]],
"expected": ["</html> foo"]
},
{"description": "html end-tag followed by text",
"input": [["EndTag", "html"], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "html end-tag followed by start-tag",
"input": [["EndTag", "html"], ["StartTag", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "html end-tag followed by end-tag",
"input": [["EndTag", "html"], ["EndTag", "foo", {}]],
"expected": ["</foo>"]
},
{"description": "html end-tag at EOF",
"input": [["EndTag", "html"]],
"expected": [""]
},
{"description": "head start-tag followed by comment",
"input": [["StartTag", "head", {}], ["Comment", "foo"]],
"expected": ["<head><!--foo-->"]
},
{"description": "head start-tag followed by space character",
"input": [["StartTag", "head", {}], ["Characters", " foo"]],
"expected": ["<head> foo"]
},
{"description": "head start-tag followed by text",
"input": [["StartTag", "head", {}], ["Characters", "foo"]],
"expected": ["<head>foo"]
},
{"description": "head start-tag followed by start-tag",
"input": [["StartTag", "head", {}], ["StartTag", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "head start-tag followed by end-tag",
"input": [["StartTag", "head", {}], ["EndTag", "foo", {}]],
"expected": ["<head></foo>"]
},
{"description": "head start-tag at EOF (shouldn't ever happen?!)",
"input": [["StartTag", "head", {}]],
"expected": ["<head>"]
},
{"description": "head end-tag followed by comment",
"input": [["EndTag", "head"], ["Comment", "foo"]],
"expected": ["</head><!--foo-->"]
},
{"description": "head end-tag followed by space character",
"input": [["EndTag", "head"], ["Characters", " foo"]],
"expected": ["</head> foo"]
},
{"description": "head end-tag followed by text",
"input": [["EndTag", "head"], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "head end-tag followed by start-tag",
"input": [["EndTag", "head"], ["StartTag", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "head end-tag followed by end-tag",
"input": [["EndTag", "head"], ["EndTag", "foo", {}]],
"expected": ["</foo>"]
},
{"description": "head end-tag at EOF",
"input": [["EndTag", "head"]],
"expected": [""]
},
{"description": "body start-tag followed by comment",
"input": [["StartTag", "body", {}], ["Comment", "foo"]],
"expected": ["<body><!--foo-->"]
},
{"description": "body start-tag followed by space character",
"input": [["StartTag", "body", {}], ["Characters", " foo"]],
"expected": ["<body> foo"]
},
{"description": "body start-tag followed by text",
"input": [["StartTag", "body", {}], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "body start-tag followed by start-tag",
"input": [["StartTag", "body", {}], ["StartTag", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "body start-tag followed by end-tag",
"input": [["StartTag", "body", {}], ["EndTag", "foo", {}]],
"expected": ["</foo>"]
},
{"description": "body start-tag at EOF (shouldn't ever happen?!)",
"input": [["StartTag", "body", {}]],
"expected": [""]
},
{"description": "body end-tag followed by comment",
"input": [["EndTag", "body"], ["Comment", "foo"]],
"expected": ["</body><!--foo-->"]
},
{"description": "body end-tag followed by space character",
"input": [["EndTag", "body"], ["Characters", " foo"]],
"expected": ["</body> foo"]
},
{"description": "body end-tag followed by text",
"input": [["EndTag", "body"], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "body end-tag followed by start-tag",
"input": [["EndTag", "body"], ["StartTag", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "body end-tag followed by end-tag",
"input": [["EndTag", "body"], ["EndTag", "foo", {}]],
"expected": ["</foo>"]
},
{"description": "body end-tag at EOF",
"input": [["EndTag", "body"]],
"expected": [""]
},
{"description": "li end-tag followed by comment",
"input": [["EndTag", "li"], ["Comment", "foo"]],
"expected": ["</li><!--foo-->"]
},
{"description": "li end-tag followed by space character",
"input": [["EndTag", "li"], ["Characters", " foo"]],
"expected": ["</li> foo"]
},
{"description": "li end-tag followed by text",
"input": [["EndTag", "li"], ["Characters", "foo"]],
"expected": ["</li>foo"]
},
{"description": "li end-tag followed by start-tag",
"input": [["EndTag", "li"], ["StartTag", "foo", {}]],
"expected": ["</li><foo>"]
},
{"description": "li end-tag followed by li start-tag",
"input": [["EndTag", "li"], ["StartTag", "li", {}]],
"expected": ["<li>"]
},
{"description": "li end-tag followed by end-tag",
"input": [["EndTag", "li"], ["EndTag", "foo", {}]],
"expected": ["</foo>"]
},
{"description": "li end-tag at EOF",
"input": [["EndTag", "li"]],
"expected": [""]
},
{"description": "dt end-tag followed by comment",
"input": [["EndTag", "dt"], ["Comment", "foo"]],
"expected": ["</dt><!--foo-->"]
},
{"description": "dt end-tag followed by space character",
"input": [["EndTag", "dt"], ["Characters", " foo"]],
"expected": ["</dt> foo"]
},
{"description": "dt end-tag followed by text",
"input": [["EndTag", "dt"], ["Characters", "foo"]],
"expected": ["</dt>foo"]
},
{"description": "dt end-tag followed by start-tag",
"input": [["EndTag", "dt"], ["StartTag", "foo", {}]],
"expected": ["</dt><foo>"]
},
{"description": "dt end-tag followed by dt start-tag",
"input": [["EndTag", "dt"], ["StartTag", "dt", {}]],
"expected": ["<dt>"]
},
{"description": "dt end-tag followed by dd start-tag",
"input": [["EndTag", "dt"], ["StartTag", "dd", {}]],
"expected": ["<dd>"]
},
{"description": "dt end-tag followed by end-tag",
"input": [["EndTag", "dt"], ["EndTag", "foo", {}]],
"expected": ["</dt></foo>"]
},
{"description": "dt end-tag at EOF",
"input": [["EndTag", "dt"]],
"expected": ["</dt>"]
},
{"description": "dd end-tag followed by comment",
"input": [["EndTag", "dd"], ["Comment", "foo"]],
"expected": ["</dd><!--foo-->"]
},
{"description": "dd end-tag followed by space character",
"input": [["EndTag", "dd"], ["Characters", " foo"]],
"expected": ["</dd> foo"]
},
{"description": "dd end-tag followed by text",
"input": [["EndTag", "dd"], ["Characters", "foo"]],
"expected": ["</dd>foo"]
},
{"description": "dd end-tag followed by start-tag",
"input": [["EndTag", "dd"], ["StartTag", "foo", {}]],
"expected": ["</dd><foo>"]
},
{"description": "dd end-tag followed by dd start-tag",
"input": [["EndTag", "dd"], ["StartTag", "dd", {}]],
"expected": ["<dd>"]
},
{"description": "dd end-tag followed by dt start-tag",
"input": [["EndTag", "dd"], ["StartTag", "dt", {}]],
"expected": ["<dt>"]
},
{"description": "dd end-tag followed by end-tag",
"input": [["EndTag", "dd"], ["EndTag", "foo", {}]],
"expected": ["</foo>"]
},
{"description": "dd end-tag at EOF",
"input": [["EndTag", "dd"]],
"expected": [""]
},
{"description": "p end-tag followed by comment",
"input": [["EndTag", "p"], ["Comment", "foo"]],
"expected": ["</p><!--foo-->"]
},
{"description": "p end-tag followed by space character",
"input": [["EndTag", "p"], ["Characters", " foo"]],
"expected": ["</p> foo"]
},
{"description": "p end-tag followed by text",
"input": [["EndTag", "p"], ["Characters", "foo"]],
"expected": ["</p>foo"]
},
{"description": "p end-tag followed by start-tag",
"input": [["EndTag", "p"], ["StartTag", "foo", {}]],
"expected": ["</p><foo>"]
},
{"description": "p end-tag followed by address start-tag",
"input": [["EndTag", "p"], ["StartTag", "address", {}]],
"expected": ["<address>"]
},
{"description": "p end-tag followed by blockquote start-tag",
"input": [["EndTag", "p"], ["StartTag", "blockquote", {}]],
"expected": ["<blockquote>"]
},
{"description": "p end-tag followed by dl start-tag",
"input": [["EndTag", "p"], ["StartTag", "dl", {}]],
"expected": ["<dl>"]
},
{"description": "p end-tag followed by fieldset start-tag",
"input": [["EndTag", "p"], ["StartTag", "fieldset", {}]],
"expected": ["<fieldset>"]
},
{"description": "p end-tag followed by form start-tag",
"input": [["EndTag", "p"], ["StartTag", "form", {}]],
"expected": ["<form>"]
},
{"description": "p end-tag followed by h1 start-tag",
"input": [["EndTag", "p"], ["StartTag", "h1", {}]],
"expected": ["<h1>"]
},
{"description": "p end-tag followed by h2 start-tag",
"input": [["EndTag", "p"], ["StartTag", "h2", {}]],
"expected": ["<h2>"]
},
{"description": "p end-tag followed by h3 start-tag",
"input": [["EndTag", "p"], ["StartTag", "h3", {}]],
"expected": ["<h3>"]
},
{"description": "p end-tag followed by h4 start-tag",
"input": [["EndTag", "p"], ["StartTag", "h4", {}]],
"expected": ["<h4>"]
},
{"description": "p end-tag followed by h5 start-tag",
"input": [["EndTag", "p"], ["StartTag", "h5", {}]],
"expected": ["<h5>"]
},
{"description": "p end-tag followed by h6 start-tag",
"input": [["EndTag", "p"], ["StartTag", "h6", {}]],
"expected": ["<h6>"]
},
{"description": "p end-tag followed by hr start-tag",
"input": [["EndTag", "p"], ["StartTag", "hr", {}]],
"expected": ["<hr>"]
},
{"description": "p end-tag followed by menu start-tag",
"input": [["EndTag", "p"], ["StartTag", "menu", {}]],
"expected": ["<menu>"]
},
{"description": "p end-tag followed by ol start-tag",
"input": [["EndTag", "p"], ["StartTag", "ol", {}]],
"expected": ["<ol>"]
},
{"description": "p end-tag followed by p start-tag",
"input": [["EndTag", "p"], ["StartTag", "p", {}]],
"expected": ["<p>"]
},
{"description": "p end-tag followed by pre start-tag",
"input": [["EndTag", "p"], ["StartTag", "pre", {}]],
"expected": ["<pre>"]
},
{"description": "p end-tag followed by table start-tag",
"input": [["EndTag", "p"], ["StartTag", "table", {}]],
"expected": ["<table>"]
},
{"description": "p end-tag followed by ul start-tag",
"input": [["EndTag", "p"], ["StartTag", "ul", {}]],
"expected": ["<ul>"]
},
{"description": "p end-tag followed by end-tag",
"input": [["EndTag", "p"], ["EndTag", "foo", {}]],
"expected": ["</foo>"]
},
{"description": "p end-tag at EOF",
"input": [["EndTag", "p"]],
"expected": [""]
},
{"description": "optgroup end-tag followed by comment",
"input": [["EndTag", "optgroup"], ["Comment", "foo"]],
"expected": ["</optgroup><!--foo-->"]
},
{"description": "optgroup end-tag followed by space character",
"input": [["EndTag", "optgroup"], ["Characters", " foo"]],
"expected": ["</optgroup> foo"]
},
{"description": "optgroup end-tag followed by text",
"input": [["EndTag", "optgroup"], ["Characters", "foo"]],
"expected": ["</optgroup>foo"]
},
{"description": "optgroup end-tag followed by start-tag",
"input": [["EndTag", "optgroup"], ["StartTag", "foo", {}]],
"expected": ["</optgroup><foo>"]
},
{"description": "optgroup end-tag followed by optgroup start-tag",
"input": [["EndTag", "optgroup"], ["StartTag", "optgroup", {}]],
"expected": ["<optgroup>"]
},
{"description": "optgroup end-tag followed by end-tag",
"input": [["EndTag", "optgroup"], ["EndTag", "foo", {}]],
"expected": ["</foo>"]
},
{"description": "optgroup end-tag at EOF",
"input": [["EndTag", "optgroup"]],
"expected": [""]
},
{"description": "option end-tag followed by comment",
"input": [["EndTag", "option"], ["Comment", "foo"]],
"expected": ["</option><!--foo-->"]
},
{"description": "option end-tag followed by space character",
"input": [["EndTag", "option"], ["Characters", " foo"]],
"expected": ["</option> foo"]
},
{"description": "option end-tag followed by text",
"input": [["EndTag", "option"], ["Characters", "foo"]],
"expected": ["</option>foo"]
},
{"description": "option end-tag followed by start-tag",
"input": [["EndTag", "option"], ["StartTag", "foo", {}]],
"expected": ["</option><foo>"]
},
{"description": "option end-tag followed by option start-tag",
"input": [["EndTag", "option"], ["StartTag", "option", {}]],
"expected": ["<option>"]
},
{"description": "option end-tag followed by end-tag",
"input": [["EndTag", "option"], ["EndTag", "foo", {}]],
"expected": ["</foo>"]
},
{"description": "option end-tag at EOF",
"input": [["EndTag", "option"]],
"expected": [""]
},
{"description": "colgroup start-tag followed by comment",
"input": [["StartTag", "colgroup", {}], ["Comment", "foo"]],
"expected": ["<colgroup><!--foo-->"]
},
{"description": "colgroup start-tag followed by space character",
"input": [["StartTag", "colgroup", {}], ["Characters", " foo"]],
"expected": ["<colgroup> foo"]
},
{"description": "colgroup start-tag followed by text",
"input": [["StartTag", "colgroup", {}], ["Characters", "foo"]],
"expected": ["<colgroup>foo"]
},
{"description": "colgroup start-tag followed by start-tag",
"input": [["StartTag", "colgroup", {}], ["StartTag", "foo", {}]],
"expected": ["<colgroup><foo>"]
},
{"description": "first colgroup in a table with a col child",
"input": [["StartTag", "table", {}], ["StartTag", "colgroup", {}], ["StartTag", "col", {}]],
"expected": ["<table><col>"]
},
{"description": "colgroup with a col child, following another colgroup",
"input": [["EndTag", "colgroup", {}], ["StartTag", "colgroup", {}], ["StartTag", "col", {}]],
"expected": ["</colgroup><col>", "<colgroup><col>"]
},
{"description": "colgroup start-tag followed by end-tag",
"input": [["StartTag", "colgroup", {}], ["EndTag", "foo", {}]],
"expected": ["<colgroup></foo>"]
},
{"description": "colgroup start-tag at EOF",
"input": [["StartTag", "colgroup", {}]],
"expected": ["<colgroup>"]
},
{"description": "colgroup end-tag followed by comment",
"input": [["EndTag", "colgroup"], ["Comment", "foo"]],
"expected": ["</colgroup><!--foo-->"]
},
{"description": "colgroup end-tag followed by space character",
"input": [["EndTag", "colgroup"], ["Characters", " foo"]],
"expected": ["</colgroup> foo"]
},
{"description": "colgroup end-tag followed by text",
"input": [["EndTag", "colgroup"], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "colgroup end-tag followed by start-tag",
"input": [["EndTag", "colgroup"], ["StartTag", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "colgroup end-tag followed by end-tag",
"input": [["EndTag", "colgroup"], ["EndTag", "foo", {}]],
"expected": ["</foo>"]
},
{"description": "colgroup end-tag at EOF",
"input": [["EndTag", "colgroup"]],
"expected": [""]
},
{"description": "thead end-tag followed by comment",
"input": [["EndTag", "thead"], ["Comment", "foo"]],
"expected": ["</thead><!--foo-->"]
},
{"description": "thead end-tag followed by space character",
"input": [["EndTag", "thead"], ["Characters", " foo"]],
"expected": ["</thead> foo"]
},
{"description": "thead end-tag followed by text",
"input": [["EndTag", "thead"], ["Characters", "foo"]],
"expected": ["</thead>foo"]
},
{"description": "thead end-tag followed by start-tag",
"input": [["EndTag", "thead"], ["StartTag", "foo", {}]],
"expected": ["</thead><foo>"]
},
{"description": "thead end-tag followed by tbody start-tag",
"input": [["EndTag", "thead"], ["StartTag", "tbody", {}]],
"expected": ["<tbody>"]
},
{"description": "thead end-tag followed by tfoot start-tag",
"input": [["EndTag", "thead"], ["StartTag", "tfoot", {}]],
"expected": ["<tfoot>"]
},
{"description": "thead end-tag followed by end-tag",
"input": [["EndTag", "thead"], ["EndTag", "foo", {}]],
"expected": ["</thead></foo>"]
},
{"description": "thead end-tag at EOF",
"input": [["EndTag", "thead"]],
"expected": ["</thead>"]
},
{"description": "tbody start-tag followed by comment",
"input": [["StartTag", "tbody", {}], ["Comment", "foo"]],
"expected": ["<tbody><!--foo-->"]
},
{"description": "tbody start-tag followed by space character",
"input": [["StartTag", "tbody", {}], ["Characters", " foo"]],
"expected": ["<tbody> foo"]
},
{"description": "tbody start-tag followed by text",
"input": [["StartTag", "tbody", {}], ["Characters", "foo"]],
"expected": ["<tbody>foo"]
},
{"description": "tbody start-tag followed by start-tag",
"input": [["StartTag", "tbody", {}], ["StartTag", "foo", {}]],
"expected": ["<tbody><foo>"]
},
{"description": "first tbody in a table with a tr child",
"input": [["StartTag", "table", {}], ["StartTag", "tbody", {}], ["StartTag", "tr", {}]],
"expected": ["<table><tr>"]
},
{"description": "tbody with a tr child, following another tbody",
"input": [["EndTag", "tbody", {}], ["StartTag", "tbody", {}], ["StartTag", "tr", {}]],
"expected": ["<tbody><tr>", "</tbody><tr>"]
},
{"description": "tbody with a tr child, following a thead",
"input": [["EndTag", "thead", {}], ["StartTag", "tbody", {}], ["StartTag", "tr", {}]],
"expected": ["<tbody><tr>", "</thead><tr>"]
},
{"description": "tbody with a tr child, following a tfoot",
"input": [["EndTag", "tfoot", {}], ["StartTag", "tbody", {}], ["StartTag", "tr", {}]],
"expected": ["<tbody><tr>", "</tfoot><tr>"]
},
{"description": "tbody start-tag followed by end-tag",
"input": [["StartTag", "tbody", {}], ["EndTag", "foo", {}]],
"expected": ["<tbody></foo>"]
},
{"description": "tbody start-tag at EOF",
"input": [["StartTag", "tbody", {}]],
"expected": ["<tbody>"]
},
{"description": "tbody end-tag followed by comment",
"input": [["EndTag", "tbody"], ["Comment", "foo"]],
"expected": ["</tbody><!--foo-->"]
},
{"description": "tbody end-tag followed by space character",
"input": [["EndTag", "tbody"], ["Characters", " foo"]],
"expected": ["</tbody> foo"]
},
{"description": "tbody end-tag followed by text",
"input": [["EndTag", "tbody"], ["Characters", "foo"]],
"expected": ["</tbody>foo"]
},
{"description": "tbody end-tag followed by start-tag",
"input": [["EndTag", "tbody"], ["StartTag", "foo", {}]],
"expected": ["</tbody><foo>"]
},
{"description": "tbody end-tag followed by tbody start-tag",
"input": [["EndTag", "tbody"], ["StartTag", "tbody", {}]],
"expected": ["<tbody>", "</tbody>"]
},
{"description": "tbody end-tag followed by tfoot start-tag",
"input": [["EndTag", "tbody"], ["StartTag", "tfoot", {}]],
"expected": ["<tfoot>"]
},
{"description": "tbody end-tag followed by end-tag",
"input": [["EndTag", "tbody"], ["EndTag", "foo", {}]],
"expected": ["</foo>"]
},
{"description": "tbody end-tag at EOF",
"input": [["EndTag", "tbody"]],
"expected": [""]
},
{"description": "tfoot end-tag followed by comment",
"input": [["EndTag", "tfoot"], ["Comment", "foo"]],
"expected": ["</tfoot><!--foo-->"]
},
{"description": "tfoot end-tag followed by space character",
"input": [["EndTag", "tfoot"], ["Characters", " foo"]],
"expected": ["</tfoot> foo"]
},
{"description": "tfoot end-tag followed by text",
"input": [["EndTag", "tfoot"], ["Characters", "foo"]],
"expected": ["</tfoot>foo"]
},
{"description": "tfoot end-tag followed by start-tag",
"input": [["EndTag", "tfoot"], ["StartTag", "foo", {}]],
"expected": ["</tfoot><foo>"]
},
{"description": "tfoot end-tag followed by tbody start-tag",
"input": [["EndTag", "tfoot"], ["StartTag", "tbody", {}]],
"expected": ["<tbody>", "</tfoot>"]
},
{"description": "tfoot end-tag followed by end-tag",
"input": [["EndTag", "tfoot"], ["EndTag", "foo", {}]],
"expected": ["</foo>"]
},
{"description": "tfoot end-tag at EOF",
"input": [["EndTag", "tfoot"]],
"expected": [""]
},
{"description": "tr end-tag followed by comment",
"input": [["EndTag", "tr"], ["Comment", "foo"]],
"expected": ["</tr><!--foo-->"]
},
{"description": "tr end-tag followed by space character",
"input": [["EndTag", "tr"], ["Characters", " foo"]],
"expected": ["</tr> foo"]
},
{"description": "tr end-tag followed by text",
"input": [["EndTag", "tr"], ["Characters", "foo"]],
"expected": ["</tr>foo"]
},
{"description": "tr end-tag followed by start-tag",
"input": [["EndTag", "tr"], ["StartTag", "foo", {}]],
"expected": ["</tr><foo>"]
},
{"description": "tr end-tag followed by tr start-tag",
"input": [["EndTag", "tr"], ["StartTag", "tr", {}]],
"expected": ["<tr>", "</tr>"]
},
{"description": "tr end-tag followed by end-tag",
"input": [["EndTag", "tr"], ["EndTag", "foo", {}]],
"expected": ["</foo>"]
},
{"description": "tr end-tag at EOF",
"input": [["EndTag", "tr"]],
"expected": [""]
},
{"description": "td end-tag followed by comment",
"input": [["EndTag", "td"], ["Comment", "foo"]],
"expected": ["</td><!--foo-->"]
},
{"description": "td end-tag followed by space character",
"input": [["EndTag", "td"], ["Characters", " foo"]],
"expected": ["</td> foo"]
},
{"description": "td end-tag followed by text",
"input": [["EndTag", "td"], ["Characters", "foo"]],
"expected": ["</td>foo"]
},
{"description": "td end-tag followed by start-tag",
"input": [["EndTag", "td"], ["StartTag", "foo", {}]],
"expected": ["</td><foo>"]
},
{"description": "td end-tag followed by td start-tag",
"input": [["EndTag", "td"], ["StartTag", "td", {}]],
"expected": ["<td>", "</td>"]
},
{"description": "td end-tag followed by th start-tag",
"input": [["EndTag", "td"], ["StartTag", "th", {}]],
"expected": ["<th>", "</td>"]
},
{"description": "td end-tag followed by end-tag",
"input": [["EndTag", "td"], ["EndTag", "foo", {}]],
"expected": ["</foo>"]
},
{"description": "td end-tag at EOF",
"input": [["EndTag", "td"]],
"expected": [""]
},
{"description": "th end-tag followed by comment",
"input": [["EndTag", "th"], ["Comment", "foo"]],
"expected": ["</th><!--foo-->"]
},
{"description": "th end-tag followed by space character",
"input": [["EndTag", "th"], ["Characters", " foo"]],
"expected": ["</th> foo"]
},
{"description": "th end-tag followed by text",
"input": [["EndTag", "th"], ["Characters", "foo"]],
"expected": ["</th>foo"]
},
{"description": "th end-tag followed by start-tag",
"input": [["EndTag", "th"], ["StartTag", "foo", {}]],
"expected": ["</th><foo>"]
},
{"description": "th end-tag followed by th start-tag",
"input": [["EndTag", "th"], ["StartTag", "th", {}]],
"expected": ["<th>", "</th>"]
},
{"description": "th end-tag followed by td start-tag",
"input": [["EndTag", "th"], ["StartTag", "td", {}]],
"expected": ["<td>", "</th>"]
},
{"description": "th end-tag followed by end-tag",
"input": [["EndTag", "th"], ["EndTag", "foo", {}]],
"expected": ["</foo>"]
},
{"description": "th end-tag at EOF",
"input": [["EndTag", "th"]],
"expected": [""]
}
]}

View file

@ -0,0 +1,45 @@
{"tests":[
{"description": "quote_char=\"'\"",
"options": {"quote_char": "'"},
"input": [["StartTag", "span", {"title": "test 'with' quote_char"}]],
"expected": ["<span title='test &#39;with&#39; quote_char'>"]
},
{"description": "quote_attr_values=true",
"options": {"quote_attr_values": true},
"input": [["StartTag", "button", {"disabled": "disabled"}]],
"expected": ["<button disabled>"]
},
{"description": "quote_attr_values=true with irrelevant",
"options": {"quote_attr_values": true},
"input": [["StartTag", "div", {"irrelevant": "irrelevant"}]],
"expected": ["<div irrelevant>"]
},
{"description": "use_trailing_solidus=true with void element",
"options": {"use_trailing_solidus": true},
"input": [["EmptyTag", "img", {}]],
"expected": ["<img />"]
},
{"description": "use_trailing_solidus=true with non-void element",
"options": {"use_trailing_solidus": true},
"input": [["StartTag", "div", {}]],
"expected": ["<div>"]
},
{"description": "minimize_boolean_attributes=false",
"options": {"minimize_boolean_attributes": false},
"input": [["StartTag", "div", {"irrelevant": "irrelevant"}]],
"expected": ["<div irrelevant=irrelevant>"]
},
{"description": "minimize_boolean_attributes=false with empty value",
"options": {"minimize_boolean_attributes": false},
"input": [["StartTag", "div", {"irrelevant": ""}]],
"expected": ["<div irrelevant=\"\">"]
}
]}

View file

@ -0,0 +1,51 @@
{"tests": [
{"description": "bare text with leading spaces",
"options": {"strip_whitespace": true},
"input": [["Characters", "\t\r\n\u000B\u000C foo"]],
"expected": ["foo"]
},
{"description": "bare text with trailing spaces",
"options": {"strip_whitespace": true},
"input": [["Characters", "foo \t\r\n\u000B\u000C"]],
"expected": ["foo"]
},
{"description": "bare text with inner spaces",
"options": {"strip_whitespace": true},
"input": [["Characters", "foo \t\r\n\u000B\u000C bar"]],
"expected": ["foo bar"]
},
{"description": "text within <pre>",
"options": {"strip_whitespace": true},
"input": [["StartTag", "pre", {}], ["Characters", "\t\r\n\u000B\u000C foo \t\r\n\u000B\u000C bar \t\r\n\u000B\u000C"], ["EndTag", "pre"]],
"expected": ["<pre>\t\r\n\u000B\u000C foo \t\r\n\u000B\u000C bar \t\r\n\u000B\u000C</pre>"]
},
{"description": "text within <pre>, with inner markup",
"options": {"strip_whitespace": true},
"input": [["StartTag", "pre", {}], ["Characters", "\t\r\n\u000B\u000C fo"], ["StartTag", "span", {}], ["Characters", "o \t\r\n\u000B\u000C b"], ["EndTag", "span"], ["Characters", "ar \t\r\n\u000B\u000C"], ["EndTag", "pre"]],
"expected": ["<pre>\t\r\n\u000B\u000C fo<span>o \t\r\n\u000B\u000C b</span>ar \t\r\n\u000B\u000C</pre>"]
},
{"description": "text within <textarea>",
"options": {"strip_whitespace": true},
"input": [["StartTag", "textarea", {}], ["Characters", "\t\r\n\u000B\u000C foo \t\r\n\u000B\u000C bar \t\r\n\u000B\u000C"], ["EndTag", "textarea"]],
"expected": ["<textarea>\t\r\n\u000B\u000C foo \t\r\n\u000B\u000C bar \t\r\n\u000B\u000C</textarea>"]
},
{"description": "text within <script>",
"options": {"strip_whitespace": true},
"input": [["StartTag", "script", {}], ["Characters", "\t\r\n\u000B\u000C foo \t\r\n\u000B\u000C bar \t\r\n\u000B\u000C"], ["EndTag", "script"]],
"expected": ["<script>\t\r\n\u000B\u000C foo \t\r\n\u000B\u000C bar \t\r\n\u000B\u000C</script>"]
},
{"description": "text within <style>",
"options": {"strip_whitespace": true},
"input": [["StartTag", "style", {}], ["Characters", "\t\r\n\u000B\u000C foo \t\r\n\u000B\u000C bar \t\r\n\u000B\u000C"], ["EndTag", "style"]],
"expected": ["<style>\t\r\n\u000B\u000C foo \t\r\n\u000B\u000C bar \t\r\n\u000B\u000C</style>"]
}
]}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,36 @@
{"tests": [
{"description":"PLAINTEXT content model flag",
"contentModelFlags":["PLAINTEXT"],
"input":"<head>&body;",
"output":[["Character", "<head>&body;"]]},
{"description":"End tag closing RCDATA or CDATA",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"bar",
"input":"foo</bar>",
"output":[["Character", "foo"], ["EndTag", "bar"]]},
{"description":"End tag with incorrect name in RCDATA or CDATA",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"baz",
"input":"</foo>bar</baz>",
"output":["ParseError", ["Character", "</foo>bar"], ["EndTag", "baz"]]},
{"description":"End tag closing RCDATA or CDATA, switching back to PCDATA",
"contentModelFlags":["RCDATA", "CDATA"],
"lastStartTag":"bar",
"input":"foo</bar></baz>",
"output":[["Character", "foo"], ["EndTag", "bar"], ["EndTag", "baz"]]},
{"description":"CDATA w/ something looking like an entity",
"contentModelFlags":["CDATA"],
"input":"&foo;",
"output":[["Character", "&foo;"]]},
{"description":"RCDATA w/ an entity",
"contentModelFlags":["RCDATA"],
"input":"&lt;",
"output":[["Character", "<"]]}
]}

View file

@ -0,0 +1,136 @@
{"tests": [
{"description":"Correct Doctype lowercase",
"input":"<!DOCTYPE html>",
"output":[["DOCTYPE", "HTML", false]]},
{"description":"Correct Doctype uppercase",
"input":"<!DOCTYPE HTML>",
"output":[["DOCTYPE", "HTML", false]]},
{"description":"Correct Doctype mixed case",
"input":"<!DOCTYPE HtMl>",
"output":[["DOCTYPE", "HTML", false]]},
{"description":"Truncated doctype start",
"input":"<!DOC>",
"output":["ParseError", ["Comment", "DOC"]]},
{"description":"Doctype in error",
"input":"<!DOCTYPE foo>",
"output":[["DOCTYPE", "FOO", true]]},
{"description":"Single Start Tag",
"input":"<h>",
"output":[["StartTag", "h", {}]]},
{"description":"Empty end tag",
"input":"</>",
"output":["ParseError"]},
{"description":"Empty start tag",
"input":"<>",
"output":["ParseError", ["Character", "<>"]]},
{"description":"Start Tag w/attribute",
"input":"<h a='b'>",
"output":[["StartTag", "h", {"a":"b"}]]},
{"description":"Start Tag w/attribute no quotes",
"input":"<h a=b>",
"output":[["StartTag", "h", {"a":"b"}]]},
{"description":"Start/End Tag",
"input":"<h></h>",
"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
{"description":"Two unclosed start tags",
"input":"<p>One<p>Two",
"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
{"description":"End Tag w/attribute",
"input":"<h></h a='b'>",
"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
{"description":"Multiple atts",
"input":"<h a='b' c='d'>",
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
{"description":"Multiple atts no space",
"input":"<h a='b'c='d'>",
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
{"description":"Repeated attr",
"input":"<h a='b' a='d'>",
"output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
{"description":"Simple comment",
"input":"<!--comment-->",
"output":[["Comment", "comment"]]},
{"description":"Comment, Central dash no space",
"input":"<!----->",
"output":["ParseError", ["Comment", "-"]]},
{"description":"Comment, two central dashes",
"input":"<!-- --comment -->",
"output":["ParseError", ["Comment", " --comment "]]},
{"description":"Unfinished comment",
"input":"<!--comment",
"output":["ParseError", ["Comment", "comment"]]},
{"description":"Start of a comment",
"input":"<!-",
"output":["ParseError", ["Comment", "-"]]},
{"description":"Ampersand only",
"input":"&",
"output":["ParseError", ["Character", "&"]]},
{"description":"Unfinished entity",
"input":"&f",
"output":["ParseError", ["Character", "&"], ["Character", "f"]]},
{"description":"Ampersand, number sign",
"input":"&#",
"output":["ParseError", ["Character", "&"], ["Character", "#"]]},
{"description":"Unfinished numeric entity",
"input":"&#x",
"output":["ParseError", ["Character", "&#x"]]},
{"description":"Entity with trailing semicolon (1)",
"input":"I'm &not;it",
"output":[["Character","I'm ¬it"]]},
{"description":"Entity with trailing semicolon (2)",
"input":"I'm &notin;",
"output":[["Character","I'm ∉"]]},
{"description":"Entity without trailing semicolon (1)",
"input":"I'm &notit",
"output":[["Character","I'm "], "ParseError", ["Character", "¬"],
["Character", "it"]]},
{"description":"Entity without trailing semicolon (2)",
"input":"I'm &notin",
"output":[["Character","I'm "], "ParseError", ["Character", "∉"]]},
{"description":"Partial entity match at end of file",
"input":"I'm &no",
"output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},
{"description":"ASCII decimal entity",
"input":"&#0036;",
"output":[["Character","$"]]},
{"description":"ASCII hexadecimal entity",
"input":"&#x3f;",
"output":[["Character","?"]]},
{"description":"Hexadecimal entity in attribute",
"input":"<h a='&#x3f;'></h>",
"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]}
]}

View file

@ -0,0 +1,105 @@
{"tests": [
{"description":"Doctype without a name",
"input":"<!DOCTYPE>",
"output":["ParseError", "ParseError", ["DOCTYPE", "", true]]},
{"description":"Correct doctype without a space before name",
"input":"<!DOCTYPEhtml>",
"output":["ParseError", ["DOCTYPE", "HTML", false]]},
{"description":"Incorrect doctype without a space before name",
"input":"<!DOCTYPEfoo>",
"output":["ParseError", ["DOCTYPE", "FOO", true]]},
{"description":"Bogus doctype",
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":["ParseError", ["DOCTYPE", "HTML", true]]},
{"description":"Incomplete doctype",
"input":"<!DOCTYPE html ",
"output":["ParseError", ["DOCTYPE", "HTML", true]]},
{"description":"Numeric entity representing the NUL character",
"input":"&#0000;",
"output":[["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity representing the NUL character",
"input":"&#x0000;",
"output":[["Character", "\uFFFD"]]},
{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
"input":"&#2225222;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
"input":"&#x1010FFFF;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Numeric entity representing a Windows-1252 'codepoint'",
"input":"&#137;",
"output":[["Character", "\u2030"]]},
{"description":"Hexadecimal entity representing a Windows-1252 'codepoint'",
"input":"&#x89;",
"output":[["Character", "\u2030"]]},
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
"input":"&#xaBcD;",
"output":[["Character", "\uABCD"]]},
{"description":"Entity without a name",
"input":"&;",
"output":["ParseError", ["Character", "&;"]]},
{"description":"Unescaped ampersand in attribute value",
"input":"<h a='&'>",
"output":["ParseError", ["StartTag", "h", { "a":"&" }]]},
{"description":"StartTag containing <",
"input":"<a<b>",
"output":["ParseError", ["StartTag", "a", { }], ["StartTag", "b", { }]]},
{"description":"Non-void element containing trailing /",
"input":"<h/>",
"output":["ParseError", ["StartTag", "h", { }]]},
{"description":"Void element with permitted slash",
"input":"<br/>",
"output":[["StartTag", "br", { }]]},
{"description":"StartTag containing /",
"input":"<h/a='b'>",
"output":["ParseError", ["StartTag", "h", { "a":"b" }]]},
{"description":"Double-quoted attribute value",
"input":"<h a=\"b\">",
"output":[["StartTag", "h", { "a":"b" }]]},
{"description":"Unescaped </",
"input":"</",
"output":["ParseError", ["Character", "</"]]},
{"description":"Illegal end tag name",
"input":"</1>",
"output":["ParseError", ["Comment", "1"]]},
{"description":"Simili processing instruction",
"input":"<?namespace>",
"output":["ParseError", ["Comment", "?namespace"]]},
{"description":"A bogus comment stops at >, even if preceeded by two dashes",
"input":"<?foo-->",
"output":["ParseError", ["Comment", "?foo--"]]},
{"description":"Unescaped <",
"input":"foo < bar",
"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
{"description":"Null Byte Replacement",
"input":"\u0000",
"output":[["Character", "\ufffd"]]}
]}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,782 @@
#data
<!doctype html>Test
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| "Test"
#data
<textarea>test</div>test
#errors
10: missing document type declaration.
17: unescaped '</' in CDATA or RCDATA block.
25: unexpected end of file while parsing CDATA section for element textarea.
#document
| <html>
| <head>
| <body>
| <textarea>
| "test</div>test"
#data
<table><td>
#errors
7: missing document type declaration.
11: required tr element start tag implied by unexpected td element start tag.
12: unexpected end of file implied table element end tag.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><td>test</tbody></table>
#errors
missing document type declarattion
Unexpected and of file
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "test"
#data
<frame>test
#errors
missing document type declaration
frame element can't occur here
#document
| <html>
| <head>
| <body>
| "test"
#data
<!doctype html><frameset>test
#errors
frameset can't contain text
Unexpected end of file
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <frameset>
#data
<!doctype html><frameset><!doctype html>
#errors
document type declaration can only occur at the start of a document
Expected end tag </frameset>
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <frameset>
#data
<!doctype html><font><p><b>test</font>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <font>
| <p>
| <font>
| <b>
| "test"
#data
<!DOCTYPE htmL><dt><div><dd>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <dt>
| <div>
| <dd>
#data
<script></x
#errors
no document type
</ in script
Unexpected end of file. Expected </script> end tag.
#document
| <html>
| <head>
| <script>
| "</x"
| <body>
#data
<table><plaintext><td>
#errors
no document type
<plaintext> directly inside table
Characters inside table.
Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <plaintext>
| "<td>"
| <table>
#data
<plaintext></plaintext>
#errors
No DOCTYPE seen.
Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
#data
<!doctype html><table><tr>TEST
#errors
TEST can't occur in <tr>
Unexpected end of file.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| "TEST"
| <table>
| <tbody>
| <tr>
#data
<!doctype html><body t1=1><body t2=2><body t3=3 t4=4>
#errors
Unexpected start tag "body"
Unexpected start tag "body"
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| t4="4"
| t2="2"
| t3="3"
| t1="1"
#data
</b test
#errors
Unexpected EOF in attribute
Unexpected attribute in end tag.
No doctype.
Unexpected end tag.
#document
| <html>
| <head>
| <body>
#data
<!doctype HtML></b test<b &=&amp>X
#errors
Unexpected < in attribute
End tag contains attributes.
Unexpected end tag.
Named entity didn't end with ;
Unexpected EOF. Missing closing tag.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <b>
| &="&"
| "X"
#data
<!doctypehtml><scrIPt type=text/x-foobar;baz>X</SCRipt
#errors
No space after literal DOCTYPE.
Unexpected EOF in (end) tag name
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <script>
| type="text/x-foobar;baz"
| "X"
| <body>
#data
&
#errors
No doctype.
Unfinished entity.
#document
| <html>
| <head>
| <body>
| "&"
#data
&#
#errors
No doctype.
Unfinished numeric entity.
#document
| <html>
| <head>
| <body>
| "&#"
#data
&#X
#errors
No doctype.
Unfinished hexadecimal entity.
#document
| <html>
| <head>
| <body>
| "&#X"
#data
&#x
#errors
No doctype.
Unfinished hexadecimal entity.
#document
| <html>
| <head>
| <body>
| "&#x"
#data
&#45
#errors
No doctype.
Numeric entity didn't end with ;
#document
| <html>
| <head>
| <body>
| "-"
#data
&x-test
#errors
No doctype.
Unfinished named entity.
#document
| <html>
| <head>
| <body>
| "&x-test"
#data
<!doctypehtml><p><li>
#errors
No space after literal DOCTYPE.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <p>
| <li>
#data
<!doctypehtml><p><dt>
#errors
No space after literal DOCTYPE.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <p>
| <dt>
#data
<!doctypehtml><p><dd>
#errors
No space after literal DOCTYPE.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <p>
| <dd>
#data
<!doctypehtml><p><form>
#errors
No space after literal DOCTYPE.
Unexpected EOF.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <p>
| <form>
#data
<!doctype html><p><b><i><u></p> <p>X
#errors
Unexpected end tag </p>.
Unexpected end EOF. Missing closing tags.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <p>
| <b>
| <i>
| <u>
| " "
| <p>
| <b>
| <i>
| <u>
| "X"
#data
<!doctype html><p></P>X
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <p>
| "X"
#data
&AMP
#errors
No doctype.
No closing ; for the entity.
#document
| <html>
| <head>
| <body>
| "&"
#data
&AMp;
#errors
No doctype.
Invalid entity.
#document
| <html>
| <head>
| <body>
| "&AMp;"
#data
<!doctype html><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY>
#errors
Unexpected end of file.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly>
#data
<!doctype html>X</body>X
#errors
Unexpected non-space characters in the after body phase.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| "XX"
#data
<!doctype html><!-- X
#errors
Unexpected end of file in comment.
#document
| <!DOCTYPE HTML>
| <!-- X -->
| <html>
| <head>
| <body>
#data
<!doctype html><table><caption>test TEST</caption><td>test
#errors
Unexpected <td> in table body phase.
Unexpected end of file.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <table>
| <caption>
| "test TEST"
| <tbody>
| <tr>
| <td>
| "test"
#data
<!doctype html><select><option><optgroup>
#errors
Unexpected end of file. Missing closing tags.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <select>
| <option>
| <optgroup>
#data
<!doctype html><select><optgroup><option></optgroup><option><select><option>
#errors
Unexpected start tag <select> in <select>.
Unexpected start tag <option>.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <select>
| <optgroup>
| <option>
| <option>
#data
<!doctype html><select><optgroup><option><optgroup>
#errors
Unexpected end of file. Missing closing tags.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <select>
| <optgroup>
| <option>
| <optgroup>
#data
<!doctype html><font><input><input></font>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <font>
| <input>
| <input>
#data
<!DoctypE html><!-- XXX - XXX -->
#errors
#document
| <!DOCTYPE HTML>
| <!-- XXX - XXX -->
| <html>
| <head>
| <body>
#data
<!DoctypE html><!-- XXX - XXX
#errors
Unexpected EOF in comment.
#document
| <!DOCTYPE HTML>
| <!-- XXX - XXX -->
| <html>
| <head>
| <body>
#data
<!DoctypE html><!-- XXX - XXX - XXX -->
#errors
#document
| <!DOCTYPE HTML>
| <!-- XXX - XXX - XXX -->
| <html>
| <head>
| <body>
#data
<isindex test=x name=x>
#errors
No doctype
<isindex> is not ok!
#document
| <html>
| <head>
| <body>
| <form>
| <hr>
| <p>
| <label>
| "This is a searchable index. Insert your search keywords here:"
| <input>
| test="x"
| name="isindex"
| <hr>
#data
test
test
#errors
No doctype
#document
| <html>
| <head>
| <body>
| "test
test"
#data
<p><b><i><u></p>
<p>X
#errors
No doctype
Unexpected end tag p.
Unexpected EOF.
#document
| <html>
| <head>
| <body>
| <p>
| <b>
| <i>
| <u>
| "
"
| <p>
| <b>
| <i>
| <u>
| "X"
#data
<!doctype html><body><title>test</body></title>
#errors
Unexpected start tag that belongs in the head.
Expected closing tag after </.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <title>
| "test</body>"
| <body>
#data
<!doctype html><body><title>X</title><meta name=z><link rel=foo><style>
x { content:"</style" } </style>
#errors
Unexpected start tag that belongs in head.
Unexpected start tag that belongs in head.
Unexpected start tag that belongs in head.
Expected closing tag after </.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <title>
| "X"
| <meta>
| name="z"
| <link>
| rel="foo"
| <body>
| <style>
| "
x { content:"</style" } "
#data
<!doctype html><select><optgroup></optgroup></select>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <select>
| <optgroup>
#data
#errors
No doctype.
#document
| "
"
| <html>
| <head>
| <body>
#data
<!doctype html> <html>
#errors
#document
| <!DOCTYPE HTML>
| " "
| <html>
| <head>
| <body>
#data
<!doctype html><script>
</script> <title>x</title> </head>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <script>
| "
"
| " "
| <title>
| "x"
| " "
| <body>
#data
<!doctype html><html><body><html id=x>
#errors
duplicate html start tag
#document
| <!DOCTYPE HTML>
| <html>
| id="x"
| <head>
| <body>
#data
<!doctype html>X</body><html id="x">
#errors
Unexpected html start tag in the after body phase.
html needs to be the first start tag.
#document
| <!DOCTYPE HTML>
| <html>
| id="x"
| <head>
| <body>
| "X"
#data
<!doctype html><head><html id=x>
#errors
html start tag too late
#document
| <!DOCTYPE HTML>
| <html>
| id="x"
| <head>
| <body>
#data
<!doctype html>X</html>X
#errors
Unexpected non-space characters. Expected end of file.
Unexpected non-space characters in after body phase. Expected end of file.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| "XX"
#data
<!doctype html>X</html>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| "X "
#data
<!doctype html>X</html><p>X
#errors
Unexpected start tag <p> in trailing end phase.
Unexpected start tag <p> in after body phase.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| "X"
| <p>
| "X"
#data
<!doctype html>X<p/x/y/z>
#errors
Solidus (/) incorrectly placed.
Solidus (/) incorrectly placed.
Solidus (/) incorrectly placed.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| "X"
| <p>
| y=""
| x=""
| z=""
#data
<!doctype html><!--x--
#errors
Unexpected end of file in comment.
#document
| <!DOCTYPE HTML>
| <!-- x -->
| <html>
| <head>
| <body>
#data
<!doctype html><table><tr><td></p></table>
#errors
Unexpected </p> end tag.
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>

View file

@ -0,0 +1,210 @@
#data
<head></head><style></style>
#errors
No DOCTYPE
<style> in after-head mode
#document
| <html>
| <head>
| <style>
| <body>
#data
<head></head><script></script>
#errors
No DOCTYPE
<script> in after-head mode
#document
| <html>
| <head>
| <script>
| <body>
#data
<head></head><!-- --><style></style><!-- --><script></script>
#errors
No DOCTYPE
<style> in after-head mode
#document
| <html>
| <head>
| <style>
| <script>
| <!-- -->
| <!-- -->
| <body>
#data
<head></head><!-- -->x<style></style><!-- --><script></script>
#errors
No DOCTYPE
#document
| <html>
| <head>
| <!-- -->
| <body>
| "x"
| <style>
| <!-- -->
| <script>
#data
<!DOCTYPE html><html><head></head><body><pre>
</pre></body></html>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <pre>
#data
<!DOCTYPE html><html><head></head><body><pre>
foo</pre></body></html>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <pre>
| "foo"
#data
<!DOCTYPE html><html><head></head><body><pre>
foo
</pre></body></html>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <pre>
| "foo
"
#data
<!DOCTYPE html><html><head></head><body><pre>x</pre><span>
</span></body></html>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <pre>
| "x"
| <span>
| "
"
#data
<!DOCTYPE html><html><head></head><body><pre>x
y</pre></body></html>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <pre>
| "x
y"
#data
<!DOCTYPE html><html><head></head><body><pre>x<div>
y</pre></body></html>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <pre>
| "x"
| <div>
| "
| y"
#data
<!DOCTYPE html><HTML><META><HEAD></HEAD></HTML>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <meta>
| <body>
#data
<!DOCTYPE html><HTML><HEAD><head></HEAD></HTML>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
#data
<textarea>foo<span>bar</span><i>baz
#errors
#document
| <html>
| <head>
| <body>
| <textarea>
| "foo<span>bar</span><i>baz"
#data
<title>foo<span>bar</em><i>baz
#errors
#document
| <html>
| <head>
| <title>
| "foo<span>bar</em><i>baz"
| <body>
#data
<!DOCTYPE html><textarea>
</textarea>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <textarea>
#data
<!DOCTYPE html><textarea>
foo</textarea>
#errors
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <textarea>
| "foo"
#data
<!DOCTYPE html><html><head></head><body><ul><li><div><p><li></ul></body></html>
#errors
Missing end tag (div)
#document
| <!DOCTYPE HTML>
| <html>
| <head>
| <body>
| <ul>
| <li>
| <div>
| <p>
| <li>

View file

@ -0,0 +1,44 @@
#data
direct div content
#errors
#document-fragment div
| "direct div content"
#data
direct textarea content
#errors
#document-fragment textarea
| "direct textarea content"
#data
textarea content with <em>pseudo</em> <foo>markup
#errors
#document-fragment textarea
| "textarea content with <em>pseudo</em> <foo>markup"
#data
this is &#x0043;DATA inside a <style> element
#errors
#document-fragment style
| "this is &#x0043;DATA inside a <style> element"
#data
</plaintext>
#errors
#document-fragment plaintext
| "</plaintext>"
#data
setting html's innerHTML
#errors
#document-fragment html
| <head>
| <body>
| "setting html's innerHTML"
#data
<title>setting head's innerHTML</title>
#errors
#document-fragment head
| <title>
| "setting head's innerHTML"

View file

@ -1,17 +1,24 @@
require 'test/unit'
HTML5LIB_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
HTML5LIB_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
if File.exists?(File.join(HTML5LIB_BASE, 'testdata'))
TESTDATA_DIR = File.join(HTML5LIB_BASE, 'testdata')
else
TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata')
end
$:.unshift File.join(File.dirname(File.dirname(__FILE__)),'lib')
$:.unshift File.dirname(__FILE__)
def html5lib_test_files(subdirectory)
Dir[File.join(HTML5LIB_BASE, 'tests', subdirectory, '*.*')]
Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
end
begin
require 'jsonx'
require 'rubygems'
require 'json'
rescue LoadError
class JSON
def self.parse json

View file

@ -9,13 +9,12 @@ class Html5EncodingTestCase < Test::Unit::TestCase
require 'UniversalDetector'
def test_chardet
File.open(File.join(HTML5LIB_BASE, 'tests', 'encoding', 'chardet', 'test_big5.txt')) do |file|
stream = HTML5lib::HTMLInputStream.new(file, :chardet => true)
assert_equal 'big5', stream.char_encoding.downcase
end
file = File.open(File.join(TESTDATA_DIR, 'encoding', 'chardet', 'test_big5.txt'), 'r')
stream = HTML5lib::HTMLInputStream.new(file, :chardet => true)
assert_equal 'big5', stream.char_encoding.downcase
rescue LoadError
puts "chardet not found, skipping chardet tests"
end
rescue LoadError
puts "chardet not found, skipping chardet tests"
end
html5lib_test_files('encoding').each do |test_file|

View file

@ -54,7 +54,7 @@ class Html5ParserTestCase < Test::Unit::TestCase
actual_errors = parser.errors.map do |(line, col), message|
'Line: %i Col: %i %s' % [line, col, message]
end
assert_equal parser.errors.length, expected_errors.length, [
assert_equal expected_errors.length, parser.errors.length, [
'Expected errors:', expected_errors.join("\n"),
'Actual errors:', actual_errors.join("\n")
].join("\n")

View file

@ -31,14 +31,14 @@ class SanitizeTest < Test::Unit::TestCase
:omit_optional_tags => false,
:inject_meta_charset => false,
:sanitize => true}).gsub(/^<div xmlns='http:\/\/www.w3.org\/1999\/xhtml'>(.*)<\/div>$/, '\1')
rescue
return "Ill-formed XHTML!"
rescue
return "Ill-formed XHTML!"
end
def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
assert_equal htmloutput, sanitize_html(input)
assert_equal xhtmloutput, sanitize_xhtml(input)
assert_equal rexmloutput, sanitize_rexml(input)
assert_equal htmloutput, sanitize_html(input)
assert_equal xhtmloutput, sanitize_xhtml(input)
assert_equal rexmloutput, sanitize_rexml(input)
end
HTMLSanitizer::ALLOWED_ELEMENTS.each do |tag_name|
@ -113,191 +113,6 @@ class SanitizeTest < Test::Unit::TestCase
end
end
def test_should_allow_anchors
input = "<a href='foo' onclick='bar'><script>baz</script></a>"
output = "<a href='foo'>&lt;script&gt;baz&lt;/script&gt;</a>"
check_sanitization(input, output, output, output)
end
# RFC 3986, sec 4.2
def test_allow_colons_in_path_component
input = "<a href=\"./this:that\">foo</a>"
output = "<a href='./this:that'>foo</a>"
check_sanitization(input, output, output, output)
end
%w(src width height alt).each do |img_attr|
define_method "test_should_allow_image_#{img_attr}_attribute" do
input = "<img #{img_attr}='foo' onclick='bar' />"
output = "<img #{img_attr}='foo'/>"
rexmloutput = "<img #{img_attr}='foo' />"
check_sanitization(input, output, output, rexmloutput)
end
end
def test_should_handle_non_html
input = 'abc'
output = 'abc'
check_sanitization(input, output, output, output)
end
def test_should_handle_blank_text
input = ''
output = ''
check_sanitization(input, output, output, output)
end
[%w(img src), %w(a href)].each do |(tag, attr)|
close = VOID_ELEMENTS.include?(tag) ? "/>boo" : ">boo</#{tag}>"
xclose = VOID_ELEMENTS.include?(tag) ? " />" : ">boo</#{tag}>"
input = %(<#{tag} #{attr}="javascript:XSS" title="1">boo</#{tag}>)
output = %(<#{tag} title='1'#{close})
rexmloutput = %(<#{tag} title='1'#{xclose})
define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do
check_sanitization(input, output, output, rexmloutput)
end
define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols_and_whitespace" do
input = %(<#{tag} #{attr}=" javascript:XSS" title="1">boo</#{tag}>)
output = %(<#{tag} title='1'#{close})
rexmloutput = %(<#{tag} title='1'#{xclose})
check_sanitization(input, output, output, rexmloutput)
end
end
[%(<img src="javascript:alert('XSS');" />),
%(<img src=javascript:alert('XSS') />),
%(<img src="JaVaScRiPt:alert('XSS')" />),
%(<img src='javascript:alert(&quot;XSS&quot;)' />),
%(<img src='javascript:alert(String.fromCharCode(88,83,83))' />),
%(<img src='&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;' />),
%(<img src='&#0000106;&#0000097;&#0000118;&#0000097;&#0000115;&#0000099;&#0000114;&#0000105;&#0000112;&#0000116;&#0000058;&#0000097;&#0000108;&#0000101;&#0000114;&#0000116;&#0000040;&#0000039;&#0000088;&#0000083;&#0000083;&#0000039;&#0000041' />),
%(<img src='&#x6A;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3A;&#x61;&#x6C;&#x65;&#x72;&#x74;&#x28;&#x27;&#x58;&#x53;&#x53;&#x27;&#x29' />),
%(<img src="jav\tascript:alert('XSS');" />),
%(<img src="jav&#x09;ascript:alert('XSS');" />),
%(<img src="jav&#x0A;ascript:alert('XSS');" />),
%(<img src="jav&#x0D;ascript:alert('XSS');" />),
%(<img src=" &#14; javascript:alert('XSS');" />),
%(<img src="&#x20;javascript:alert('XSS');" />),
%(<img src="&#xA0;javascript:alert('XSS');" />)].each_with_index do |img_hack, i|
define_method "test_should_not_fall_for_xss_image_hack_#{i}" do
output = "<img/>"
rexmloutput = "<img />"
rexmloutput = "Ill-formed XHTML!" if i == 1
check_sanitization(img_hack, output, output, rexmloutput)
end
end
def test_should_sanitize_tag_broken_up_by_null
input = %(<scr\0ipt>alert(\"XSS\")</scr\0ipt>)
output = "&lt;scr\357\277\275ipt&gt;alert(\"XSS\")&lt;/scr\357\277\275ipt&gt;"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
end
def test_should_sanitize_invalid_script_tag
input = %(<script/XSS SRC="http://ha.ckers.org/xss.js"></script>)
output = "&lt;script XSS=\"\" SRC=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
end
def test_should_sanitize_script_tag_with_multiple_open_brackets
input = %(<<script>alert("XSS");//<</script>)
output = "&lt;&lt;script&gt;alert(\"XSS\");//&lt;&lt;/script&gt;"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
input = %(<iframe src=http://ha.ckers.org/scriptlet.html\n<)
output = %(&lt;iframe src=\"http://ha.ckers.org/scriptlet.html\"&gt;&lt;)
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
end
def test_should_sanitize_unclosed_script
input = %(<script src=http://ha.ckers.org/xss.js?<b>)
output = "&lt;script src=\"http://ha.ckers.org/xss.js?\"&gt;<b/>"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
end
def test_should_sanitize_half_open_scripts
input = %(<img src="javascript:alert('XSS')")
output = "<img/>"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
end
def test_should_not_fall_for_ridiculous_hack
img_hack = %(<img\nsrc\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n />)
output = "<img/>"
rexmloutput = "<img />"
check_sanitization(img_hack, output, output, rexmloutput)
end
def test_platypus
input = %(<a href="http://www.ragingplatypus.com/" style="display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;">never trust your upstream platypus</a>)
output = %(<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>)
check_sanitization(input, output, output, output)
end
def test_xul
input = %(<p style="-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')">fubar</p>)
output = %(<p style=''>fubar</p>)
check_sanitization(input, output, output, output)
end
def test_input_image
input = %(<input type="image" src="javascript:alert('XSS');" />)
output = %(<input type='image'/>)
rexmloutput = %(<input type='image' />)
check_sanitization(input, output, output, rexmloutput)
end
def test_non_alpha_non_digit
input = %(<script/XSS src="http://ha.ckers.org/xss.js"></script>)
output = "&lt;script XSS=\"\" src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
input = '<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>'
output = "<a>foo</a>"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
input = '<img/src="http://ha.ckers.org/xss.js"/>'
output = "<img src='http://ha.ckers.org/xss.js'/>"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
end
def test_img_dynsrc_lowsrc
input = %(<img dynsrc="javascript:alert('XSS')" />)
output = "<img/>"
rexmloutput = "<img />"
check_sanitization(input, output, output, rexmloutput)
end
def test_div_background_image_unicode_encoded
input = %(<div style="background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029">foo</div>)
output = "<div style=''>foo</div>"
check_sanitization(input, output, output, output)
end
def test_div_expression
input = %(<div style="width: expression(alert('XSS'));">foo</div>)
output = "<div style=''>foo</div>"
check_sanitization(input, output, output, output)
end
def test_img_vbscript
input = %(<img src='vbscript:msgbox("XSS")' />)
output = '<img/>'
rexmloutput = '<img />'
check_sanitization(input, output, output, rexmloutput)
end
def test_should_handle_astral_plane_characters
input = "<p>&#x1d4b5; &#x1d538;</p>"
output = "<p>\360\235\222\265 \360\235\224\270</p>"
@ -308,67 +123,6 @@ class SanitizeTest < Test::Unit::TestCase
check_sanitization(input, output, output, output)
end
def test_should_handle_malformed_image_tags
input = %(<img """><script>alert("XSS")</script>">)
output = "<img/>&lt;script&gt;alert(\"XSS\")&lt;/script&gt;\"&gt;"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
end
def test_non_alpha_non_digit_II
input = %(<a href!#\$%&()*~+-_.,:;?@[/|\]^`=alert('XSS')>foo</a>)
output = "<a>foo</a>"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
end
def test_non_alpha_non_digit_III
input = %(<a/href="javascript:alert('XSS');">foo</a>)
output = "<a>foo</a>"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
end
def test_no_closing_script_tags
input = %(<script src=http://ha.ckers.org/xss.js?<b>)
output = "&lt;script src=\"http://ha.ckers.org/xss.js?\"&gt;<b/>"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
end
def test_protocol_resolution_in_script_tag
input = %(<script src=//ha.ckers.org/.j></script>)
output = "&lt;script src=\"//ha.ckers.org/.j\"&gt;&lt;/script&gt;"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
end
def test_double_open_angle_brackets
input = %(<img src=http://ha.ckers.org/scriptlet.html <)
output = "<img src='http://ha.ckers.org/scriptlet.html'/>&lt;"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
input = %(<script src=http://ha.ckers.org/scriptlet.html <)
output = "&lt;script src=\"http://ha.ckers.org/scriptlet.html\"&gt;&lt;"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
end
def test_background_attribute
input = %(<div background="javascript:alert('XSS')"></div>)
output = "<div/>"
xhtmloutput = "<div></div>"
check_sanitization(input, output, xhtmloutput, xhtmloutput)
end
def test_bgsound
input = %(<bgsound src="javascript:alert('XSS');" />)
output = "&lt;bgsound src=\"javascript:alert('XSS');\"/&gt;"
rexmloutput = "&lt;bgsound src=\"javascript:alert('XSS');\"&gt;&lt;/bgsound&gt;"
check_sanitization(input, output, output, rexmloutput)
end
# This affects only NS4. Is it worth fixing?
# def test_javascript_includes
# input = %(<div size="&{alert('XSS')}">foo</div>)
@ -376,45 +130,16 @@ class SanitizeTest < Test::Unit::TestCase
# check_sanitization(input, output, output, output)
# end
def test_link_stylesheets
input =%(<link rel="stylesheet" href="javascript:alert('XSS');" />)
output = "&lt;link rel=\"stylesheet\" href=\"javascript:alert('XSS');\"/&gt;"
rexmloutput = "&lt;link href=\"javascript:alert('XSS');\" rel=\"stylesheet\"/&gt;"
check_sanitization(input, output, output, rexmloutput)
input =%(<link rel="stylesheet" href="http://ha.ckers.org/xss.css" />)
output = "&lt;link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\"/&gt;"
rexmloutput = "&lt;link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\"/&gt;"
check_sanitization(input, output, output, rexmloutput)
end
def test_list_style_image
input = %(<li style="list-style-image: url\(javascript:alert\('XSS'\)\)">foo</li>)
output = "<li style=''>foo</li>"
check_sanitization(input, output, output, output)
end
def test_IE_Comments
input = %(<!--[if gte IE 4]><script>alert\('XSS'\);</script><![endif]-->)
output = ""
check_sanitization(input, output, output, output)
input = %(<![if !IE 5]><script>alert\('XSS'\);</script><![endif]>)
output = "&lt;script&gt;alert('XSS');&lt;/script&gt;"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
end
def test_xml_base
input = %(<div xml:base="javascript:alert('XSS');//">foo</div>)
output = "<div>foo</div>"
check_sanitization(input, output, output, output)
end
def test_grave_accents
input =%(<img src=`javascript:alert('XSS')` />)
output = "<img/>"
rexmloutput = "Ill-formed XHTML!"
check_sanitization(input, output, output, rexmloutput)
html5lib_test_files('sanitizer').each do |filename|
JSON::parse(open(filename).read).each do |test|
define_method "test_#{test['name']}" do
check_sanitization(
test['input'],
test['output'],
test['xhtml'] || test['output'],
test['rexml'] || test['output']
)
end
end
end
end