#!/usr/bin/env ruby #coding: ascii-8bit require File.expand_path(File.join(File.dirname(__FILE__), '/../test_helper')) require 'sanitizer' require 'json' require 'stringsupport' class SanitizerTest < Test::Unit::TestCase include Sanitizer def setup end def do_sanitize_xhtml stream xhtml_sanitize(stream) end def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput) assert_equal xhtmloutput.as_bytes, do_sanitize_xhtml(input).as_bytes end def test_sanitize_named_entities input = '
Greek &phis; φ, double-struck 𝔸, numeric 𝔸 ⁗, uppercase ™ <
' output = "Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227, uppercase \342\204\242 <
" output2 = "Greek \317\225 \317\206, double-struck \360\235\224\270, numeric 𝔸 ⁗, uppercase \342\204\242 <
" check_sanitization(input, output, output, output) assert_equal(output2, input.to_utf8.as_bytes) end def test_sanitize_malformed_utf8 input = "\357elephant & \302ivory
".purify output = "".respond_to?(:force_encoding) ? "elephant & ivory
" : "ephant & vory
" check_sanitization(input, output, output, output) end Sanitizer::ALLOWED_ELEMENTS.each do |tag_name| define_method "test_should_allow_#{tag_name}_tag" do input = "<#{tag_name} title='1'>foofoo
foo <bad>bar</bad> baz
" htmloutput = "foo <bad>bar</bad> baz
" check_sanitization(input, htmloutput, output, output) end end Sanitizer::ALLOWED_ATTRIBUTES.each do |attribute_name| define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do input = "foo
foo <bad>bar</bad> baz
" check_sanitization(input, output, output, output) end end Sanitizer::ALLOWED_PROTOCOLS.each do |protocol| define_method "test_should_allow_#{protocol}_uris" do input = %(foo) output = "foo" check_sanitization(input, output, output, output) end end Sanitizer::ALLOWED_PROTOCOLS.each do |protocol| define_method "test_should_allow_uppercase_#{protocol}_uris" do input = %(foo) output = "foo" check_sanitization(input, output, output, output) end end Sanitizer::SVG_ALLOW_LOCAL_HREF.each do |tag_name| next unless Sanitizer::ALLOWED_ELEMENTS.include?(tag_name) define_method "test_#{tag_name}_should_allow_local_href" do input = %(<#{tag_name} xlink:href="#foo"/>) output = "<#{tag_name.downcase} xlink:href='#foo'/>" xhtmloutput = "<#{tag_name} xlink:href='#foo'/>" check_sanitization(input, output, xhtmloutput, xhtmloutput) end define_method "test_#{tag_name}_should_allow_local_href_with_newline" do input = %(<#{tag_name} xlink:href="\n#foo"/>) output = "<#{tag_name.downcase} xlink:href='\n#foo'/>" xhtmloutput = "<#{tag_name} xlink:href='\n#foo'/>" check_sanitization(input, output, xhtmloutput, xhtmloutput) end define_method "test_#{tag_name}_should_forbid_nonlocal_href" do input = %(<#{tag_name} xlink:href="http://bad.com/foo"/>) output = "<#{tag_name.downcase}/>" xhtmloutput = "<#{tag_name}/>" check_sanitization(input, output, xhtmloutput, xhtmloutput) end define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_newline" do input = %(<#{tag_name} xlink:href="\nhttp://bad.com/foo"/>) output = "<#{tag_name.downcase}/>" xhtmloutput = "<#{tag_name}/>" check_sanitization(input, output, xhtmloutput, xhtmloutput) end end def test_should_handle_astral_plane_characters input = "𝒵 𝔸
" output = "\360\235\222\265 \360\235\224\270
" check_sanitization(input, output, output, output) input = "