From e7b77dd3d3cc04daf4dbb3e5abf323dd3b924831 Mon Sep 17 00:00:00 2001 From: Jacques Distler Date: Fri, 9 Oct 2009 13:02:02 -0500 Subject: [PATCH] Sanitizer Refactoring A bit of cleanup for the Sanitizer. --- lib/sanitizer.rb | 57 ++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/lib/sanitizer.rb b/lib/sanitizer.rb index 57c5591c..7052443d 100644 --- a/lib/sanitizer.rb +++ b/lib/sanitizer.rb @@ -117,9 +117,9 @@ module Sanitizer # ALLOWED_PROTOCOLS are allowed. # You can adjust what gets sanitized, by defining these constant arrays before this Module is loaded. # - # sanitize_html('') + # sanitize_xhtml('') # => <script> do_nasty_stuff() </script> - # sanitize_html('Click here for $100') + # sanitize_xhtml('Click here for $100') # => Click here for $100 def xhtml_sanitize(html) if html.index("<") @@ -131,31 +131,7 @@ module Sanitizer new_text << case node.tag? when true if ALLOWED_ELEMENTS.include?(node.name) - if node.attributes - node.attributes.delete_if { |attr,v| !ALLOWED_ATTRIBUTES.include?(attr) } - ATTR_VAL_IS_URI.each do |attr| - val_unescaped = node.attributes[attr].to_s.unescapeHTML.gsub(/`|[\000-\040\177\s]+|\302[\200-\240]/,'').downcase - if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0]) - node.attributes.delete attr - end - end - SVG_ATTR_VAL_ALLOWS_REF.each do |attr| - node.attributes[attr] = node.attributes[attr].to_s.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if node.attributes[attr] - end - if SVG_ALLOW_LOCAL_HREF.include?(node.name) && node.attributes['xlink:href'] && node.attributes['xlink:href'] =~ /^\s*[^#\s].*/m - node.attributes.delete 'xlink:href' - end - if node.attributes['style'] - node.attributes['style'] = sanitize_css(node.attributes['style']) - end - node.attributes.each do |attr,val| - if String === val - node.attributes[attr] = val.unescapeHTML.escapeHTML - else - node.attributes.delete attr - end - end - end + process_attributes_for(node) node.to_s else node.to_s.gsub(//, ">") @@ -169,7 +145,32 @@ module Sanitizer end html end - + + protected + + def process_attributes_for(node) + return unless node.attributes + node.attributes.each do |attr,val| + if String === val && ALLOWED_ATTRIBUTES.include?(attr) + val = val.unescapeHTML.escapeHTML + else + node.attributes.delete attr; next + end + if attr == 'xlink:href' && SVG_ALLOW_LOCAL_HREF.include?(node.name) && val =~ /^\s*[^#\s]/m + node.attributes.delete attr; next + end + if ATTR_VAL_IS_URI.include?(attr) + val_unescaped = val.unescapeHTML.gsub(/`|[\000-\040\177\s]+|\302[\200-\240]/,'').downcase + if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0]) + node.attributes.delete attr; next + end + end + val = val.to_s.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if SVG_ATTR_VAL_ALLOWS_REF.include?(attr) + val = sanitize_css(val) if attr == 'style' + node.attributes[attr] = val + end + end + def sanitize_css(style) # disallow urls style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')