2008-05-21 00:02:10 +02:00
|
|
|
#!/usr/bin/env ruby
|
2008-11-24 22:53:39 +01:00
|
|
|
#coding: ascii-8bit
|
2008-05-21 00:02:10 +02:00
|
|
|
|
|
|
|
require File.expand_path(File.join(File.dirname(__FILE__), '/../test_helper'))
|
|
|
|
require 'sanitizer'
|
|
|
|
require 'json'
|
|
|
|
require 'stringsupport'
|
|
|
|
|
|
|
|
class SanitizerTest < Test::Unit::TestCase
|
|
|
|
|
|
|
|
include Sanitizer
|
|
|
|
|
|
|
|
def setup
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
def do_sanitize_xhtml stream
|
2008-05-21 09:06:31 +02:00
|
|
|
xhtml_sanitize(stream)
|
2008-05-21 00:02:10 +02:00
|
|
|
end
|
|
|
|
|
|
|
|
def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
|
2009-11-30 23:28:18 +01:00
|
|
|
assert_equal xhtmloutput.as_bytes, do_sanitize_xhtml(input).as_bytes
|
2008-05-21 00:02:10 +02:00
|
|
|
end
|
|
|
|
|
2009-09-25 22:59:43 +02:00
|
|
|
def test_sanitize_named_entities
|
|
|
|
input = '<p>Greek &phis; φ, double-struck 𝔸, numeric 𝔸 ⁗, uppercase ™ <</p>'
|
|
|
|
output = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric \360\235\224\270 \342\201\227, uppercase \342\204\242 <</p>"
|
|
|
|
output2 = "<p>Greek \317\225 \317\206, double-struck \360\235\224\270, numeric 𝔸 ⁗, uppercase \342\204\242 <</p>"
|
|
|
|
check_sanitization(input, output, output, output)
|
2009-11-30 23:28:18 +01:00
|
|
|
assert_equal(output2, input.to_utf8.as_bytes)
|
2009-09-25 22:59:43 +02:00
|
|
|
end
|
|
|
|
|
2009-09-26 07:36:28 +02:00
|
|
|
def test_sanitize_malformed_utf8
|
|
|
|
input = "<p>\357elephant & \302ivory</p>".purify
|
2009-11-30 23:28:18 +01:00
|
|
|
output = "".respond_to?(:force_encoding) ? "<p>elephant & ivory</p>" : "<p>ephant & vory</p>"
|
2009-09-26 07:36:28 +02:00
|
|
|
check_sanitization(input, output, output, output)
|
|
|
|
end
|
|
|
|
|
2008-05-21 00:02:10 +02:00
|
|
|
Sanitizer::ALLOWED_ELEMENTS.each do |tag_name|
|
|
|
|
define_method "test_should_allow_#{tag_name}_tag" do
|
|
|
|
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
|
|
|
htmloutput = "<#{tag_name.downcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.downcase}>"
|
|
|
|
xhtmloutput = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
|
|
|
rexmloutput = xhtmloutput
|
|
|
|
|
|
|
|
if VOID_ELEMENTS.include?(tag_name)
|
|
|
|
htmloutput = "<#{tag_name} title='1'/>foo <bad>bar</bad> baz"
|
|
|
|
xhtmloutput = htmloutput
|
|
|
|
htmloutput += '<br/>' if tag_name == 'br'
|
|
|
|
rexmloutput = "<#{tag_name} title='1' />"
|
|
|
|
end
|
|
|
|
check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
Sanitizer::ALLOWED_ELEMENTS.each do |tag_name|
|
|
|
|
define_method "test_should_forbid_#{tag_name.upcase}_tag" do
|
|
|
|
input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
|
|
|
|
output = "<#{tag_name.upcase} title=\"1\">foo <bad>bar</bad> baz</#{tag_name.upcase}>"
|
|
|
|
xhtmloutput = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
|
|
|
|
check_sanitization(input, output, xhtmloutput, output)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
Sanitizer::ALLOWED_ATTRIBUTES.each do |attribute_name|
|
|
|
|
next if attribute_name == 'style'
|
|
|
|
define_method "test_should_allow_#{attribute_name}_attribute" do
|
|
|
|
input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
|
|
|
|
output = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
|
|
|
|
htmloutput = "<p #{attribute_name.downcase}='foo'>foo <bad>bar</bad> baz</p>"
|
|
|
|
check_sanitization(input, htmloutput, output, output)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
Sanitizer::ALLOWED_ATTRIBUTES.each do |attribute_name|
|
|
|
|
define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
|
|
|
|
input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
|
|
|
|
output = "<p>foo <bad>bar</bad> baz</p>"
|
|
|
|
check_sanitization(input, output, output, output)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
Sanitizer::ALLOWED_PROTOCOLS.each do |protocol|
|
|
|
|
define_method "test_should_allow_#{protocol}_uris" do
|
|
|
|
input = %(<a href="#{protocol}">foo</a>)
|
|
|
|
output = "<a href='#{protocol}'>foo</a>"
|
|
|
|
check_sanitization(input, output, output, output)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
Sanitizer::ALLOWED_PROTOCOLS.each do |protocol|
|
|
|
|
define_method "test_should_allow_uppercase_#{protocol}_uris" do
|
|
|
|
input = %(<a href="#{protocol.upcase}">foo</a>)
|
|
|
|
output = "<a href='#{protocol.upcase}'>foo</a>"
|
|
|
|
check_sanitization(input, output, output, output)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
Sanitizer::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
|
|
|
|
next unless Sanitizer::ALLOWED_ELEMENTS.include?(tag_name)
|
|
|
|
define_method "test_#{tag_name}_should_allow_local_href" do
|
|
|
|
input = %(<#{tag_name} xlink:href="#foo"/>)
|
|
|
|
output = "<#{tag_name.downcase} xlink:href='#foo'/>"
|
|
|
|
xhtmloutput = "<#{tag_name} xlink:href='#foo'/>"
|
|
|
|
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
|
|
|
end
|
|
|
|
|
|
|
|
define_method "test_#{tag_name}_should_allow_local_href_with_newline" do
|
|
|
|
input = %(<#{tag_name} xlink:href="\n#foo"/>)
|
|
|
|
output = "<#{tag_name.downcase} xlink:href='\n#foo'/>"
|
|
|
|
xhtmloutput = "<#{tag_name} xlink:href='\n#foo'/>"
|
|
|
|
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
|
|
|
end
|
|
|
|
|
|
|
|
define_method "test_#{tag_name}_should_forbid_nonlocal_href" do
|
|
|
|
input = %(<#{tag_name} xlink:href="http://bad.com/foo"/>)
|
|
|
|
output = "<#{tag_name.downcase}/>"
|
|
|
|
xhtmloutput = "<#{tag_name}/>"
|
|
|
|
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
|
|
|
end
|
|
|
|
|
|
|
|
define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_newline" do
|
|
|
|
input = %(<#{tag_name} xlink:href="\nhttp://bad.com/foo"/>)
|
|
|
|
output = "<#{tag_name.downcase}/>"
|
|
|
|
xhtmloutput = "<#{tag_name}/>"
|
|
|
|
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def test_should_handle_astral_plane_characters
|
|
|
|
input = "<p>𝒵 𝔸</p>"
|
|
|
|
output = "<p>\360\235\222\265 \360\235\224\270</p>"
|
|
|
|
check_sanitization(input, output, output, output)
|
|
|
|
|
|
|
|
input = "<p><tspan>\360\235\224\270</tspan> a</p>"
|
|
|
|
output = "<p><tspan>\360\235\224\270</tspan> a</p>"
|
|
|
|
check_sanitization(input, output, output, output)
|
|
|
|
end
|
|
|
|
|
|
|
|
# This affects only NS4. Is it worth fixing?
|
|
|
|
# def test_javascript_includes
|
|
|
|
# input = %(<div size="&{alert('XSS')}">foo</div>)
|
|
|
|
# output = "<div>foo</div>"
|
|
|
|
# check_sanitization(input, output, output, output)
|
|
|
|
# end
|
|
|
|
|
|
|
|
JSON::parse(open(File.expand_path(File.join(File.dirname(__FILE__), '/../sanitizer.dat'))).read).each do |test|
|
|
|
|
define_method "test_#{test['name']}" do
|
|
|
|
check_sanitization(
|
|
|
|
test['input'],
|
|
|
|
test['output'],
|
|
|
|
test['xhtml'] || test['output'],
|
|
|
|
test['rexml'] || test['output']
|
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|