From 94476d9865b72dda047e14057188f3339e3613a2 Mon Sep 17 00:00:00 2001 From: Jacques Distler Date: Mon, 5 Jan 2009 22:13:09 -0600 Subject: [PATCH] More Tests Enable unit tests for the HTML5lib Sanitizer (used in the environment). --- test/sanitizer.dat | 12 +-- test/unit/sanitize_test.rb | 149 +++++++++++++++++++++++++++++++++++++ 2 files changed, 156 insertions(+), 5 deletions(-) diff --git a/test/sanitizer.dat b/test/sanitizer.dat index 064fa5cc..f7cdf51e 100644 --- a/test/sanitizer.dat +++ b/test/sanitizer.dat @@ -1,9 +1,9 @@ [ { "name": "IE_Comments", - "input": "", - "output": "", - "xhtml": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->" + "input": "a", + "output": "a", + "xhtml": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->a" }, { @@ -211,7 +211,8 @@ { "name": "should_handle_blank_text", "input": "", - "output": "" + "output": "
", + "xhtml": "" }, { @@ -503,7 +504,8 @@ { "name": "attributes_with_embedded_quotes", "input": "", - "output": "", + "output": "", + "xhtml": "", "rexml": "Ill-formed XHTML!" }, diff --git a/test/unit/sanitize_test.rb b/test/unit/sanitize_test.rb index 37260516..fb897b6c 100644 --- a/test/unit/sanitize_test.rb +++ b/test/unit/sanitize_test.rb @@ -2,6 +2,8 @@ require File.expand_path(File.dirname(__FILE__) + '/../test_helper') require 'sanitize' +require 'json' + class SanitizeTest < Test::Unit::TestCase @@ -11,6 +13,14 @@ class SanitizeTest < Test::Unit::TestCase end + def do_sanitize_xhtml stream + safe_sanitize_xhtml(stream) + end + + def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput) + assert_equal htmloutput, do_sanitize_xhtml(input) + end + def rexml_doc(string) REXML::Document.new( "
#{string}
") @@ -30,5 +40,144 @@ class SanitizeTest < Test::Unit::TestCase assert_equal(output2, input.to_utf8) end + Sanitizer::ALLOWED_ELEMENTS.each do |tag_name| + define_method "test_should_allow_#{tag_name}_tag" do + input = "<#{tag_name} title='1'>foo bar baz" + htmloutput = "<#{tag_name.downcase} title='1'>foo <bad>bar</bad> baz" + xhtmloutput = "<#{tag_name} title='1'>foo <bad>bar</bad> baz" + rexmloutput = xhtmloutput + + if %w[caption colgroup optgroup option tbody td tfoot th thead tr].include?(tag_name) + htmloutput = "foo <bad>bar</bad> baz" + xhtmloutput = htmloutput + elsif tag_name == 'col' + htmloutput = "foo <bad>bar</bad> baz" + xhtmloutput = htmloutput + rexmloutput = "" + elsif tag_name == 'table' + htmloutput = "foo <bad>bar</bad>baz
" + xhtmloutput = htmloutput + elsif tag_name == 'image' + htmloutput = "foo <bad>bar</bad> baz" + xhtmloutput = htmloutput + rexmloutput = "foo <bad>bar</bad> baz" + elsif VOID_ELEMENTS.include?(tag_name) + htmloutput = "<#{tag_name} title='1'/>foo <bad>bar</bad> baz" + xhtmloutput = htmloutput + htmloutput += '
' if tag_name == 'br' + rexmloutput = "<#{tag_name} title='1' />" + end + check_sanitization(input, xhtmloutput, xhtmloutput, rexmloutput) + end + end + + Sanitizer::ALLOWED_ELEMENTS.each do |tag_name| + define_method "test_should_forbid_#{tag_name.upcase}_tag" do + input = "<#{tag_name.upcase} title='1'>foo bar baz" + output = "<#{tag_name.upcase} title=\"1\">foo <bad>bar</bad> baz</#{tag_name.upcase}>" + xhtmloutput = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>" + check_sanitization(input, output, xhtmloutput, output) + end + end + + Sanitizer::ALLOWED_ATTRIBUTES.each do |attribute_name| + next if attribute_name == 'style' || attribute_name.include?(':') + define_method "test_should_allow_#{attribute_name}_attribute" do + input = "

foo bar baz

" + output = "

foo <bad>bar</bad> baz

" + htmloutput = "

foo <bad>bar</bad> baz

" + check_sanitization(input, output, output, output) + end + end + + Sanitizer::ALLOWED_ATTRIBUTES.each do |attribute_name| + define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do + input = "

foo bar baz

" + output = "

foo <bad>bar</bad> baz

" + check_sanitization(input, output, output, output) + end + end + + Sanitizer::ALLOWED_PROTOCOLS.each do |protocol| + define_method "test_should_allow_#{protocol}_uris" do + input = %(foo) + output = "foo" + check_sanitization(input, output, output, output) + end + end + + Sanitizer::ALLOWED_PROTOCOLS.each do |protocol| + define_method "test_should_allow_uppercase_#{protocol}_uris" do + input = %(foo) + output = "foo" + check_sanitization(input, output, output, output) + end + end + + Sanitizer::SVG_ALLOW_LOCAL_HREF.each do |tag_name| + next unless Sanitizer::ALLOWED_ELEMENTS.include?(tag_name) + define_method "test_#{tag_name}_should_allow_local_href_with_ns_decl" do + input = %(<#{tag_name} xlink:href="#foo" xmlns:xlink='http://www.w3.org/1999/xlink'/>) + output = "<#{tag_name.downcase} xlink:href='#foo' xmlns:xlink='http://www.w3.org/1999/xlink'/>" + xhtmloutput = "<#{tag_name} xlink:href='#foo' xmlns:xlink='http://www.w3.org/1999/xlink'/>" + check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput) + end + + define_method "test_#{tag_name}_should_allow_local_href_with_newline_and_ns_decl" do + input = %(<#{tag_name} xlink:href="\n#foo" xmlns:xlink='http://www.w3.org/1999/xlink'/>) + output = "<#{tag_name.downcase} xlink:href='\n#foo' xmlns:xlink='http://www.w3.org/1999/xlink'/>" + xhtmloutput = "<#{tag_name} xlink:href='\n#foo' xmlns:xlink='http://www.w3.org/1999/xlink'/>" + check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput) + end + + define_method "test_#{tag_name}_should_forbid_local_href_without_ns_decl" do + input = %(<#{tag_name} xlink:href="#foo"/>) + output = "<#{tag_name.downcase} xlink:href='#foo'/>" + xhtmloutput = "<#{tag_name} xlink:href='#foo'></#{tag_name}>" + check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput) + end + + define_method "test_#{tag_name}_should_forbid_local_href_with_newline_without_ns_decl" do + input = %(<#{tag_name} xlink:href="\n#foo"/>) + output = "<#{tag_name.downcase} xlink:href='\n#foo'/>" + xhtmloutput = "<#{tag_name} xlink:href='\n#foo'></#{tag_name}>" + check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput) + end + + define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_ns_decl" do + input = %(<#{tag_name} xlink:href="http://bad.com/foo" xmlns:xlink='http://www.w3.org/1999/xlink'/>) + output = "<#{tag_name.downcase} xmlns:xlink='http://www.w3.org/1999/xlink'/>" + xhtmloutput = "<#{tag_name} xmlns:xlink='http://www.w3.org/1999/xlink'/>" + check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput) + end + + define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_newline_and_ns_decl" do + input = %(<#{tag_name} xlink:href="\nhttp://bad.com/foo" xmlns:xlink='http://www.w3.org/1999/xlink'/>) + output = "<#{tag_name.downcase} xmlns:xlink='http://www.w3.org/1999/xlink'/>" + xhtmloutput = "<#{tag_name} xmlns:xlink='http://www.w3.org/1999/xlink'/>" + check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput) + end + end + + def test_should_handle_astral_plane_characters + input = "

𝒵 𝔸

" + output = "

\360\235\222\265 \360\235\224\270

" + check_sanitization(input, output, output, output) + + input = "

\360\235\224\270 a

" + output = "

\360\235\224\270 a

" + check_sanitization(input, output, output, output) + end + + JSON::parse(open(File.expand_path(File.join(File.dirname(__FILE__), '/../sanitizer.dat'))).read).each do |test| + define_method "test_#{test['name']}" do + check_sanitization( + test['input'], + test['output'], + test['xhtml'] || test['output'], + test['rexml'] || test['output'] + ) + end + end end