More Tests

Enable unit tests for the HTML5lib Sanitizer (used in the <nowiki>
environment).
This commit is contained in:
Jacques Distler 2009-01-05 22:13:09 -06:00
parent 52c1f74ecc
commit 94476d9865
2 changed files with 156 additions and 5 deletions

View file

@ -1,9 +1,9 @@
[ [
{ {
"name": "IE_Comments", "name": "IE_Comments",
"input": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->", "input": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->a",
"output": "", "output": "a",
"xhtml": "&lt;!--[if gte IE 4]&gt;&lt;script&gt;alert(&#39;XSS&#39;);&lt;/script&gt;&lt;![endif]--&gt;" "xhtml": "&lt;!--[if gte IE 4]&gt;&lt;script&gt;alert(&#39;XSS&#39;);&lt;/script&gt;&lt;![endif]--&gt;a"
}, },
{ {
@ -211,7 +211,8 @@
{ {
"name": "should_handle_blank_text", "name": "should_handle_blank_text",
"input": "", "input": "",
"output": "" "output": "<div xmlns='http://www.w3.org/1999/xhtml'/>",
"xhtml": ""
}, },
{ {
@ -503,7 +504,8 @@
{ {
"name": "attributes_with_embedded_quotes", "name": "attributes_with_embedded_quotes",
"input": "<img src=doesntexist.jpg\"'onerror=\"alert(1) />", "input": "<img src=doesntexist.jpg\"'onerror=\"alert(1) />",
"output": "<img src='doesntexist.jpg&quot;&#39;onerror=&quot;alert(1)'/>", "output": "<img src='doesntexist.jpg&quot;&apos;onerror=&quot;alert(1)'/>",
"xhtml": "<img src='doesntexist.jpg&quot;&#39;onerror=&quot;alert(1)'/>",
"rexml": "Ill-formed XHTML!" "rexml": "Ill-formed XHTML!"
}, },

View file

@ -2,6 +2,8 @@
require File.expand_path(File.dirname(__FILE__) + '/../test_helper') require File.expand_path(File.dirname(__FILE__) + '/../test_helper')
require 'sanitize' require 'sanitize'
require 'json'
class SanitizeTest < Test::Unit::TestCase class SanitizeTest < Test::Unit::TestCase
@ -11,6 +13,14 @@ class SanitizeTest < Test::Unit::TestCase
end end
def do_sanitize_xhtml stream
safe_sanitize_xhtml(stream)
end
def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
assert_equal htmloutput, do_sanitize_xhtml(input)
end
def rexml_doc(string) def rexml_doc(string)
REXML::Document.new( REXML::Document.new(
"<div xmlns='http://www.w3.org/1999/xhtml'>#{string}</div>") "<div xmlns='http://www.w3.org/1999/xhtml'>#{string}</div>")
@ -30,5 +40,144 @@ class SanitizeTest < Test::Unit::TestCase
assert_equal(output2, input.to_utf8) assert_equal(output2, input.to_utf8)
end end
Sanitizer::ALLOWED_ELEMENTS.each do |tag_name|
define_method "test_should_allow_#{tag_name}_tag" do
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
xhtmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name}>"
rexmloutput = xhtmloutput
if %w[caption colgroup optgroup option tbody td tfoot th thead tr].include?(tag_name)
htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt; baz"
xhtmloutput = htmloutput
elsif tag_name == 'col'
htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt; baz"
xhtmloutput = htmloutput
rexmloutput = "<col title='1' />"
elsif tag_name == 'table'
htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt;baz<table title='1'> </table>"
xhtmloutput = htmloutput
elsif tag_name == 'image'
htmloutput = "<img title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
xhtmloutput = htmloutput
rexmloutput = "<image title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</image>"
elsif VOID_ELEMENTS.include?(tag_name)
htmloutput = "<#{tag_name} title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
xhtmloutput = htmloutput
htmloutput += '<br/>' if tag_name == 'br'
rexmloutput = "<#{tag_name} title='1' />"
end
check_sanitization(input, xhtmloutput, xhtmloutput, rexmloutput)
end
end
Sanitizer::ALLOWED_ELEMENTS.each do |tag_name|
define_method "test_should_forbid_#{tag_name.upcase}_tag" do
input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
output = "&lt;#{tag_name.upcase} title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/#{tag_name.upcase}&gt;"
xhtmloutput = "&lt;#{tag_name.upcase} title='1'&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/#{tag_name.upcase}&gt;"
check_sanitization(input, output, xhtmloutput, output)
end
end
Sanitizer::ALLOWED_ATTRIBUTES.each do |attribute_name|
next if attribute_name == 'style' || attribute_name.include?(':')
define_method "test_should_allow_#{attribute_name}_attribute" do
input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
output = "<p #{attribute_name}='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
htmloutput = "<p #{attribute_name.downcase}='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
check_sanitization(input, output, output, output)
end
end
Sanitizer::ALLOWED_ATTRIBUTES.each do |attribute_name|
define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
output = "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
check_sanitization(input, output, output, output)
end
end
Sanitizer::ALLOWED_PROTOCOLS.each do |protocol|
define_method "test_should_allow_#{protocol}_uris" do
input = %(<a href="#{protocol}">foo</a>)
output = "<a href='#{protocol}'>foo</a>"
check_sanitization(input, output, output, output)
end
end
Sanitizer::ALLOWED_PROTOCOLS.each do |protocol|
define_method "test_should_allow_uppercase_#{protocol}_uris" do
input = %(<a href="#{protocol.upcase}">foo</a>)
output = "<a href='#{protocol.upcase}'>foo</a>"
check_sanitization(input, output, output, output)
end
end
Sanitizer::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
next unless Sanitizer::ALLOWED_ELEMENTS.include?(tag_name)
define_method "test_#{tag_name}_should_allow_local_href_with_ns_decl" do
input = %(<#{tag_name} xlink:href="#foo" xmlns:xlink='http://www.w3.org/1999/xlink'/>)
output = "<#{tag_name.downcase} xlink:href='#foo' xmlns:xlink='http://www.w3.org/1999/xlink'/>"
xhtmloutput = "<#{tag_name} xlink:href='#foo' xmlns:xlink='http://www.w3.org/1999/xlink'/>"
check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput)
end
define_method "test_#{tag_name}_should_allow_local_href_with_newline_and_ns_decl" do
input = %(<#{tag_name} xlink:href="\n#foo" xmlns:xlink='http://www.w3.org/1999/xlink'/>)
output = "<#{tag_name.downcase} xlink:href='\n#foo' xmlns:xlink='http://www.w3.org/1999/xlink'/>"
xhtmloutput = "<#{tag_name} xlink:href='\n#foo' xmlns:xlink='http://www.w3.org/1999/xlink'/>"
check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput)
end
define_method "test_#{tag_name}_should_forbid_local_href_without_ns_decl" do
input = %(<#{tag_name} xlink:href="#foo"/>)
output = "&lt;#{tag_name.downcase} xlink:href='#foo'/>"
xhtmloutput = "&lt;#{tag_name} xlink:href=&#39;#foo&#39;&gt;&lt;/#{tag_name}&gt;"
check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput)
end
define_method "test_#{tag_name}_should_forbid_local_href_with_newline_without_ns_decl" do
input = %(<#{tag_name} xlink:href="\n#foo"/>)
output = "&lt;#{tag_name.downcase} xlink:href='\n#foo'/>"
xhtmloutput = "&lt;#{tag_name} xlink:href=&#39;\n#foo&#39;&gt;&lt;/#{tag_name}&gt;"
check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput)
end
define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_ns_decl" do
input = %(<#{tag_name} xlink:href="http://bad.com/foo" xmlns:xlink='http://www.w3.org/1999/xlink'/>)
output = "<#{tag_name.downcase} xmlns:xlink='http://www.w3.org/1999/xlink'/>"
xhtmloutput = "<#{tag_name} xmlns:xlink='http://www.w3.org/1999/xlink'/>"
check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput)
end
define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_newline_and_ns_decl" do
input = %(<#{tag_name} xlink:href="\nhttp://bad.com/foo" xmlns:xlink='http://www.w3.org/1999/xlink'/>)
output = "<#{tag_name.downcase} xmlns:xlink='http://www.w3.org/1999/xlink'/>"
xhtmloutput = "<#{tag_name} xmlns:xlink='http://www.w3.org/1999/xlink'/>"
check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput)
end
end
def test_should_handle_astral_plane_characters
input = "<p>&#x1d4b5; &#x1d538;</p>"
output = "<p>\360\235\222\265 \360\235\224\270</p>"
check_sanitization(input, output, output, output)
input = "<p><tspan>\360\235\224\270</tspan> a</p>"
output = "<p><tspan>\360\235\224\270</tspan> a</p>"
check_sanitization(input, output, output, output)
end
JSON::parse(open(File.expand_path(File.join(File.dirname(__FILE__), '/../sanitizer.dat'))).read).each do |test|
define_method "test_#{test['name']}" do
check_sanitization(
test['input'],
test['output'],
test['xhtml'] || test['output'],
test['rexml'] || test['output']
)
end
end
end end