From d8e06f6db92e5784da4b8e87a608a0209f5bddb9 Mon Sep 17 00:00:00 2001 From: Jacques Distler Date: Fri, 23 Feb 2007 13:34:58 -0600 Subject: [PATCH] Sanitize URI schemes. --- lib/sanitize.rb | 10 +++++++++- test/unit/sanitize_test.rb | 25 ++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/lib/sanitize.rb b/lib/sanitize.rb index 6f44561c..68e9fe47 100644 --- a/lib/sanitize.rb +++ b/lib/sanitize.rb @@ -104,11 +104,17 @@ module Sanitize 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin', 'stroke-opacity'] + acceptable_protocols = [ 'ed2k', 'ftp', 'http', 'https', 'irc', + 'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal', + 'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag', + 'ssh', 'sftp', 'rtsp', 'afs' ] + ALLOWED_ELEMENTS = acceptable_elements + mathml_elements + svg_elements unless defined?(ALLOWED_ELEMENTS) ALLOWED_ATTRIBUTES = acceptable_attributes + mathml_attributes + svg_attributes unless defined?(ALLOWED_ATTRIBUTES) ALLOWED_CSS_PROPERTIES = acceptable_css_properties unless defined?(ALLOWED_CSS_PROPERTIES) ALLOWED_CSS_KEYWORDS = acceptable_css_keywords unless defined?(ALLOWED_CSS_KEYWORDS) ALLOWED_SVG_PROPERTIES = acceptable_svg_properties unless defined?(ALLOWED_SVG_PROPERTIES) + ALLOWED_PROTOCOLS = acceptable_protocols unless defined?(ALLOWED_PROTOCOLS) # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style attributes are parsed, and a restricted set, @@ -133,7 +139,9 @@ module Sanitize node.attributes.delete_if { |attr,v| !ALLOWED_ATTRIBUTES.include?(attr) } %w(href src).each do |attr| val_unescaped = CGI.unescapeHTML(node.attributes[attr].to_s).gsub(/[\000-\040\177-\240]+/,'') - node.attributes.delete attr if val_unescaped =~ /^javascript:/i + if val_unescaped =~ /^\w+:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0]) + node.attributes.delete attr + end end if node.attributes['style'] node.attributes['style'] = sanitize_css(node.attributes['style']) diff --git a/test/unit/sanitize_test.rb b/test/unit/sanitize_test.rb index ea8defd8..eb2e9080 100644 --- a/test/unit/sanitize_test.rb +++ b/test/unit/sanitize_test.rb @@ -25,9 +25,11 @@ class SanitizeTest < Test::Unit::TestCase end Sanitize::ALLOWED_ATTRIBUTES.each do |attribute_name| - define_method "test_should_allow_#{attribute_name}_attribute" do - assert_equal "

foo <bad>bar</bad> baz

", - sanitize_html("

foo bar baz

") + if attribute_name != 'style' + define_method "test_should_allow_#{attribute_name}_attribute" do + assert_equal "

foo <bad>bar</bad> baz

", + sanitize_html("

foo bar baz

") + end end end @@ -140,4 +142,21 @@ class SanitizeTest < Test::Unit::TestCase sanitize_html('') end + def test_img_dynsrc_lowsrc + assert_equal "", + sanitize_html(%()) + assert_equal "", + sanitize_html(%()) + end + + def test_div_background_image_unicode_encoded + assert_equal '
foo
', + sanitize_html(%(
foo
)) + end + + def test_div_expression + assert_equal '
foo
', + sanitize_html(%(
foo
)) + end + end