diff --git a/lib/sanitizer.rb b/lib/sanitizer.rb index 01391232..aa886ab6 100644 --- a/lib/sanitizer.rb +++ b/lib/sanitizer.rb @@ -133,7 +133,7 @@ module Sanitizer if node.attributes node.attributes.delete_if { |attr,v| !ALLOWED_ATTRIBUTES.include?(attr) } ATTR_VAL_IS_URI.each do |attr| - val_unescaped = CGI.unescapeHTML(node.attributes[attr].to_s).gsub(/`|[\000-\040\177\s\200-\240]/,'').downcase + val_unescaped = node.attributes[attr].to_s.unescapeHTML.gsub(/`|[\000-\040\177\s]+|\302[\200-\240]/,'').downcase if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0]) node.attributes.delete attr end diff --git a/lib/stringsupport.rb b/lib/stringsupport.rb index 29b7206e..0c490f47 100644 --- a/lib/stringsupport.rb +++ b/lib/stringsupport.rb @@ -2243,7 +2243,7 @@ class String end when /\A#x([0-9a-f]+)\z/ni then if $1.hex < 256 - $1.hex.chr + [$1.hex].pack("U") else if $1.hex < 1114111 [$1.hex].pack("U") diff --git a/test/sanitizer.dat b/test/sanitizer.dat index 078becfa..064fa5cc 100644 --- a/test/sanitizer.dat +++ b/test/sanitizer.dat @@ -470,6 +470,47 @@ "rexml": "<image src=\"foo\"></image>", "xhtml": "<image src='foo'/>", "output": "<image src=\"foo\"/>" - } + }, + { + "name": "style_attr_end_with_nothing", + "input": "
", + "output": "
", + "rexml": "
" + }, + + { + "name": "style_attr_end_with_space", + "input": "
", + "output": "
", + "rexml": "
" + }, + + { + "name": "style_attr_end_with_semicolon", + "input": "
", + "output": "
", + "rexml": "
" + }, + + { + "name": "style_attr_end_with_semicolon_space", + "input": "
", + "output": "
", + "rexml": "
" + }, + + { + "name": "attributes_with_embedded_quotes", + "input": "", + "output": "", + "rexml": "Ill-formed XHTML!" + }, + + { + "name": "attributes_with_embedded_quotes_II", + "input": "", + "output": "", + "rexml": "Ill-formed XHTML!" + } ] diff --git a/vendor/plugins/HTML5lib/testdata/sanitizer/tests1.dat b/vendor/plugins/HTML5lib/testdata/sanitizer/tests1.dat index 73de161a..7323b17d 100644 --- a/vendor/plugins/HTML5lib/testdata/sanitizer/tests1.dat +++ b/vendor/plugins/HTML5lib/testdata/sanitizer/tests1.dat @@ -451,5 +451,51 @@ "input": "", "rexml": "<image src=\"foo\"></image>", "output": "<image src=\"foo\"/>" + }, + + { + "name": "style_attr_end_with_nothing", + "input": "
", + "output": "
", + "xhtml": "
", + "rexml": "
" + }, + + { + "name": "style_attr_end_with_space", + "input": "
", + "output": "
", + "xhtml": "
", + "rexml": "
" + }, + + { + "name": "style_attr_end_with_semicolon", + "input": "
", + "output": "
", + "xhtml": "
", + "rexml": "
" + }, + + { + "name": "style_attr_end_with_semicolon_space", + "input": "
", + "output": "
", + "xhtml": "
", + "rexml": "
" + }, + + { + "name": "attributes_with_embedded_quotes", + "input": "", + "output": "", + "rexml": "Ill-formed XHTML!" + }, + + { + "name": "attributes_with_embedded_quotes_II", + "input": "", + "output": "", + "rexml": "Ill-formed XHTML!" } ]