More lenient URI scheme matching in sanitize.

This commit is contained in:
Jacques Distler 2007-02-24 22:47:31 -06:00
parent f9dcfa5af0
commit 507a17aade
2 changed files with 15 additions and 2 deletions

View file

@ -143,8 +143,8 @@ module Sanitize
if node.closing != :close
node.attributes.delete_if { |attr,v| !ALLOWED_ATTRIBUTES.include?(attr) }
ATTR_VAL_IS_URI.each do |attr|
val_unescaped = CGI.unescapeHTML(node.attributes[attr].to_s).gsub(/[\000-\040\177-\240]+/,'')
if val_unescaped =~ /^\w+:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0])
val_unescaped = CGI.unescapeHTML(node.attributes[attr].to_s).gsub(/[\000-\040\177-\240]+/,'').downcase
if val_unescaped =~ /^[+-.\w]+:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0])
node.attributes.delete attr
end
end

View file

@ -47,11 +47,24 @@ class SanitizeTest < Test::Unit::TestCase
end
end
Sanitize::ALLOWED_PROTOCOLS.each do |protocol|
define_method "test_should_allow_uppercase_#{protocol}_uris" do
assert_equal "<a href=\"#{protocol.upcase}\">foo</a>",
sanitize_html(%(<a href="#{protocol.upcase}">foo</a>))
end
end
def test_should_allow_anchors
assert_equal "<a href=\"foo\">&lt;script>baz&lt;/script></a>",
sanitize_html("<a href='foo' onclick='bar'><script>baz</script></a>")
end
# RFC 3986, sec 4.2
def test_allow_colons_in_path_component
assert_equal "<a href=\"./this:that\">foo</a>",
sanitize_html("<a href=\"./this:that\">foo</a>")
end
%w(src width height alt).each do |img_attr|
define_method "test_should_allow_image_#{img_attr}_attribute" do
assert_equal "<img #{img_attr}=\"foo\" />",