Sanitize URI schemes.

This commit is contained in:
Jacques Distler 2007-02-23 13:34:58 -06:00
parent 4c903d6a77
commit d8e06f6db9
2 changed files with 31 additions and 4 deletions

View file

@ -104,11 +104,17 @@ module Sanitize
'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
'stroke-opacity']
acceptable_protocols = [ 'ed2k', 'ftp', 'http', 'https', 'irc',
'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
'ssh', 'sftp', 'rtsp', 'afs' ]
ALLOWED_ELEMENTS = acceptable_elements + mathml_elements + svg_elements unless defined?(ALLOWED_ELEMENTS)
ALLOWED_ATTRIBUTES = acceptable_attributes + mathml_attributes + svg_attributes unless defined?(ALLOWED_ATTRIBUTES)
ALLOWED_CSS_PROPERTIES = acceptable_css_properties unless defined?(ALLOWED_CSS_PROPERTIES)
ALLOWED_CSS_KEYWORDS = acceptable_css_keywords unless defined?(ALLOWED_CSS_KEYWORDS)
ALLOWED_SVG_PROPERTIES = acceptable_svg_properties unless defined?(ALLOWED_SVG_PROPERTIES)
ALLOWED_PROTOCOLS = acceptable_protocols unless defined?(ALLOWED_PROTOCOLS)
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and stripping out all
# attributes not in ALLOWED_ATTRIBUTES. Style attributes are parsed, and a restricted set,
@ -133,7 +139,9 @@ module Sanitize
node.attributes.delete_if { |attr,v| !ALLOWED_ATTRIBUTES.include?(attr) }
%w(href src).each do |attr|
val_unescaped = CGI.unescapeHTML(node.attributes[attr].to_s).gsub(/[\000-\040\177-\240]+/,'')
node.attributes.delete attr if val_unescaped =~ /^javascript:/i
if val_unescaped =~ /^\w+:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0])
node.attributes.delete attr
end
end
if node.attributes['style']
node.attributes['style'] = sanitize_css(node.attributes['style'])

View file

@ -25,9 +25,11 @@ class SanitizeTest < Test::Unit::TestCase
end
Sanitize::ALLOWED_ATTRIBUTES.each do |attribute_name|
define_method "test_should_allow_#{attribute_name}_attribute" do
assert_equal "<p #{attribute_name}=\"display: none;\">foo &lt;bad>bar&lt;/bad> baz</p>",
sanitize_html("<p #{attribute_name}='display: none;'>foo <bad>bar</bad> baz</p>")
if attribute_name != 'style'
define_method "test_should_allow_#{attribute_name}_attribute" do
assert_equal "<p #{attribute_name}=\"foo\">foo &lt;bad>bar&lt;/bad> baz</p>",
sanitize_html("<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>")
end
end
end
@ -140,4 +142,21 @@ class SanitizeTest < Test::Unit::TestCase
sanitize_html('<img/src="http://ha.ckers.org/xss.js"/>')
end
def test_img_dynsrc_lowsrc
assert_equal "<img />",
sanitize_html(%(<img dynsrc="javascript:alert('XSS')" />))
assert_equal "<img />",
sanitize_html(%(<img lowsrc="javascript:alert('XSS')" />))
end
def test_div_background_image_unicode_encoded
assert_equal '<div style="">foo</div>',
sanitize_html(%(<div style="background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029">foo</div>))
end
def test_div_expression
assert_equal '<div style="">foo</div>',
sanitize_html(%(<div style="width: expression(alert('XSS'));">foo</div>))
end
end