#!/usr/bin/env ruby require File.expand_path(File.dirname(__FILE__) + '/../test_helper') require 'sanitize' require 'json' class SanitizeTest < Test::Unit::TestCase include Sanitize def setup end def do_sanitize_xhtml stream safe_sanitize_xhtml(stream) end def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput) assert_equal htmloutput, do_sanitize_xhtml(input) end def rexml_doc(string) REXML::Document.new( "
#{string}
") end def my_rex(string) sanitize_rexml(rexml_doc(string.to_utf8)).gsub(/\A
(.*)<\/div>\Z/m, '\1') end def test_sanitize_named_entities input = '

Greek &phis; φ, double-struck 𝔸, numeric 𝔸 ⁗, uppercase ™ <

' output = "

Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227, uppercase \342\204\242 <

" output2 = "

Greek \317\225 \317\206, double-struck \360\235\224\270, numeric 𝔸 ⁗, uppercase \342\204\242 <

" assert_equal(output, sanitize_xhtml(input)) assert_equal(output, sanitize_html(input)) assert_equal(output, my_rex(input)) assert_equal(output2, input.to_utf8) end def test_sanitize_malformed_utf8 input = "

\357elephant & \302ivory

" output = "

\357\277\275elephant & \357\277\275ivory

" check_sanitization(input, output, output, output) end Sanitizer::ALLOWED_ELEMENTS.each do |tag_name| define_method "test_should_allow_#{tag_name}_tag" do input = "<#{tag_name} title='1'>foo bar baz" htmloutput = "<#{tag_name.downcase} title='1'>foo <bad>bar</bad> baz" xhtmloutput = "<#{tag_name} title='1'>foo <bad>bar</bad> baz" rexmloutput = xhtmloutput if %w[caption colgroup optgroup option tbody td tfoot th thead tr].include?(tag_name) htmloutput = "foo <bad>bar</bad> baz" xhtmloutput = htmloutput elsif tag_name == 'col' htmloutput = "foo <bad>bar</bad> baz" xhtmloutput = htmloutput rexmloutput = "" elsif tag_name == 'table' htmloutput = "foo <bad>bar</bad>baz
" xhtmloutput = htmloutput elsif tag_name == 'image' htmloutput = "foo <bad>bar</bad> baz" xhtmloutput = htmloutput rexmloutput = "foo <bad>bar</bad> baz" elsif VOID_ELEMENTS.include?(tag_name) htmloutput = "<#{tag_name} title='1'/>foo <bad>bar</bad> baz" xhtmloutput = htmloutput htmloutput += '
' if tag_name == 'br' rexmloutput = "<#{tag_name} title='1' />" end check_sanitization(input, xhtmloutput, xhtmloutput, rexmloutput) end end Sanitizer::ALLOWED_ELEMENTS.each do |tag_name| define_method "test_should_forbid_#{tag_name.upcase}_tag" do input = "<#{tag_name.upcase} title='1'>foo bar baz" output = "<#{tag_name.upcase} title=\"1\">foo <bad>bar</bad> baz</#{tag_name.upcase}>" xhtmloutput = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>" check_sanitization(input, output, xhtmloutput, output) end end Sanitizer::ALLOWED_ATTRIBUTES.each do |attribute_name| next if attribute_name == 'style' || attribute_name.include?(':') define_method "test_should_allow_#{attribute_name}_attribute" do input = "

foo bar baz

" output = "

foo <bad>bar</bad> baz

" htmloutput = "

foo <bad>bar</bad> baz

" check_sanitization(input, output, output, output) end end Sanitizer::ALLOWED_ATTRIBUTES.each do |attribute_name| define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do input = "

foo bar baz

" output = "

foo <bad>bar</bad> baz

" check_sanitization(input, output, output, output) end end Sanitizer::ALLOWED_PROTOCOLS.each do |protocol| define_method "test_should_allow_#{protocol}_uris" do input = %(foo) output = "foo" check_sanitization(input, output, output, output) end end Sanitizer::ALLOWED_PROTOCOLS.each do |protocol| define_method "test_should_allow_uppercase_#{protocol}_uris" do input = %(foo) output = "foo" check_sanitization(input, output, output, output) end end Sanitizer::SVG_ALLOW_LOCAL_HREF.each do |tag_name| next unless Sanitizer::ALLOWED_ELEMENTS.include?(tag_name) define_method "test_#{tag_name}_should_allow_local_href_with_ns_decl" do input = %(<#{tag_name} xlink:href="#foo" xmlns:xlink='http://www.w3.org/1999/xlink'/>) output = "<#{tag_name.downcase} xlink:href='#foo' xmlns:xlink='http://www.w3.org/1999/xlink'/>" xhtmloutput = "<#{tag_name} xlink:href='#foo' xmlns:xlink='http://www.w3.org/1999/xlink'/>" check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput) end define_method "test_#{tag_name}_should_allow_local_href_with_newline_and_ns_decl" do input = %(<#{tag_name} xlink:href="\n#foo" xmlns:xlink='http://www.w3.org/1999/xlink'/>) output = "<#{tag_name.downcase} xlink:href='\n#foo' xmlns:xlink='http://www.w3.org/1999/xlink'/>" xhtmloutput = "<#{tag_name} xlink:href='\n#foo' xmlns:xlink='http://www.w3.org/1999/xlink'/>" check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput) end define_method "test_#{tag_name}_should_forbid_local_href_without_ns_decl" do input = %(<#{tag_name} xlink:href="#foo"/>) output = "<#{tag_name.downcase} xlink:href='#foo'/>" xhtmloutput = "<#{tag_name} xlink:href='#foo'></#{tag_name}>" check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput) end define_method "test_#{tag_name}_should_forbid_local_href_with_newline_without_ns_decl" do input = %(<#{tag_name} xlink:href="\n#foo"/>) output = "<#{tag_name.downcase} xlink:href='\n#foo'/>" xhtmloutput = "<#{tag_name} xlink:href='\n#foo'></#{tag_name}>" check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput) end define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_ns_decl" do input = %(<#{tag_name} xlink:href="http://bad.com/foo" xmlns:xlink='http://www.w3.org/1999/xlink'/>) output = "<#{tag_name.downcase} xmlns:xlink='http://www.w3.org/1999/xlink'/>" xhtmloutput = "<#{tag_name} xmlns:xlink='http://www.w3.org/1999/xlink'/>" check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput) end define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_newline_and_ns_decl" do input = %(<#{tag_name} xlink:href="\nhttp://bad.com/foo" xmlns:xlink='http://www.w3.org/1999/xlink'/>) output = "<#{tag_name.downcase} xmlns:xlink='http://www.w3.org/1999/xlink'/>" xhtmloutput = "<#{tag_name} xmlns:xlink='http://www.w3.org/1999/xlink'/>" check_sanitization(input, xhtmloutput, xhtmloutput, xhtmloutput) end end def test_should_handle_astral_plane_characters input = "

𝒵 𝔸

" output = "

\360\235\222\265 \360\235\224\270

" check_sanitization(input, output, output, output) input = "

\360\235\224\270 a

" output = "

\360\235\224\270 a

" check_sanitization(input, output, output, output) end JSON::parse(open(File.expand_path(File.join(File.dirname(__FILE__), '/../sanitizer.dat'))).read).each do |test| define_method "test_#{test['name']}" do check_sanitization( test['input'], test['output'], test['xhtml'] || test['output'], test['rexml'] || test['output'] ) end end end