From 507a17aade89c1adc4b540c8611a3a68fcdcf102 Mon Sep 17 00:00:00 2001 From: Jacques Distler Date: Sat, 24 Feb 2007 22:47:31 -0600 Subject: [PATCH] More lenient URI scheme matching in sanitize. --- lib/sanitize.rb | 4 ++-- test/unit/sanitize_test.rb | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/lib/sanitize.rb b/lib/sanitize.rb index e544f86f..ebc045e0 100644 --- a/lib/sanitize.rb +++ b/lib/sanitize.rb @@ -143,8 +143,8 @@ module Sanitize if node.closing != :close node.attributes.delete_if { |attr,v| !ALLOWED_ATTRIBUTES.include?(attr) } ATTR_VAL_IS_URI.each do |attr| - val_unescaped = CGI.unescapeHTML(node.attributes[attr].to_s).gsub(/[\000-\040\177-\240]+/,'') - if val_unescaped =~ /^\w+:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0]) + val_unescaped = CGI.unescapeHTML(node.attributes[attr].to_s).gsub(/[\000-\040\177-\240]+/,'').downcase + if val_unescaped =~ /^[+-.\w]+:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0]) node.attributes.delete attr end end diff --git a/test/unit/sanitize_test.rb b/test/unit/sanitize_test.rb index db0d4bc2..9ac2247e 100644 --- a/test/unit/sanitize_test.rb +++ b/test/unit/sanitize_test.rb @@ -47,11 +47,24 @@ class SanitizeTest < Test::Unit::TestCase end end + Sanitize::ALLOWED_PROTOCOLS.each do |protocol| + define_method "test_should_allow_uppercase_#{protocol}_uris" do + assert_equal "foo", + sanitize_html(%(foo)) + end + end + def test_should_allow_anchors assert_equal "<script>baz</script>", sanitize_html("") end + # RFC 3986, sec 4.2 + def test_allow_colons_in_path_component + assert_equal "foo", + sanitize_html("foo") + end + %w(src width height alt).each do |img_attr| define_method "test_should_allow_image_#{img_attr}_attribute" do assert_equal "",