diff --git a/lib/sanitizer.rb b/lib/sanitizer.rb index bcfb143a..63eeb625 100644 --- a/lib/sanitizer.rb +++ b/lib/sanitizer.rb @@ -123,31 +123,33 @@ module Sanitizer # # xhtml_sanitize('') # => <script> do_nasty_stuff() </script> - # xhtml_sanitize_xhtml('Click here for $100') + # xhtml_sanitize('Click here for $100') # => Click here for $100 def xhtml_sanitize(html) - if html.index("<") - tokenizer = HTML::Tokenizer.new(html.to_utf8) - new_text = "" + return html unless sanitizeable?(html) + tokenizer = HTML::Tokenizer.new(html.to_utf8) + results = [] - while token = tokenizer.next - node = XHTML::Node.parse(nil, 0, 0, token, false) - new_text << case node.tag? - when true - if ALLOWED_ELEMENTS.include?(node.name) - process_attributes_for(node) - node.to_s - else - node.to_s.gsub(//, ">") - end + while token = tokenizer.next + node = XHTML::Node.parse(nil, 0, 0, token, false) + results << case node.tag? + when true + if ALLOWED_ELEMENTS.include?(node.name) + process_attributes_for(node) + node.to_s else - node.to_s.unescapeHTML.escapeHTML - end + node.to_s.gsub(//, ">") + end + else + node.to_s.unescapeHTML.escapeHTML end - - html = new_text end - html + + results.join + end + + def sanitizeable?(text) + !(text.nil? || text.empty? || !text.index("<")) end protected diff --git a/test/unit/page_renderer_test.rb b/test/unit/page_renderer_test.rb index d50f9322..8f2ba29a 100644 --- a/test/unit/page_renderer_test.rb +++ b/test/unit/page_renderer_test.rb @@ -373,10 +373,10 @@ END_THM assert_markup_parsed_as( "

should we go " + "That Way or

\n
" + - "(1)ThisWay" + "ThisWay" + - "(1)
", + "", "should we go ThatWay or \n\\[ThisWay\\]\n") assert_markup_parsed_as( @@ -393,7 +393,7 @@ END_THM "That Way or

\n
" + "ThisWay$" + - "100 ThatWay" + + "100ThatWay" + "ThisWay \\$100 " + "ThatWay
", "should we go ThatWay or \n$$ThisWay \\$100 ThatWay $$\n")