From d14db51d9e7aa762102841e320cdb2c6156419c5 Mon Sep 17 00:00:00 2001 From: Jacques Distler Date: Fri, 9 Oct 2009 23:18:17 -0500 Subject: [PATCH] More Sanitizer Refactoring Make the Sanitizer more efficient. Also, update some unit tests. --- lib/sanitizer.rb | 40 +++++++++++++++++---------------- test/unit/page_renderer_test.rb | 6 ++--- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/lib/sanitizer.rb b/lib/sanitizer.rb index bcfb143a..63eeb625 100644 --- a/lib/sanitizer.rb +++ b/lib/sanitizer.rb @@ -123,31 +123,33 @@ module Sanitizer # # xhtml_sanitize('') # => <script> do_nasty_stuff() </script> - # xhtml_sanitize_xhtml('Click here for $100') + # xhtml_sanitize('Click here for $100') # => Click here for $100 def xhtml_sanitize(html) - if html.index("<") - tokenizer = HTML::Tokenizer.new(html.to_utf8) - new_text = "" + return html unless sanitizeable?(html) + tokenizer = HTML::Tokenizer.new(html.to_utf8) + results = [] - while token = tokenizer.next - node = XHTML::Node.parse(nil, 0, 0, token, false) - new_text << case node.tag? - when true - if ALLOWED_ELEMENTS.include?(node.name) - process_attributes_for(node) - node.to_s - else - node.to_s.gsub(//, ">") - end + while token = tokenizer.next + node = XHTML::Node.parse(nil, 0, 0, token, false) + results << case node.tag? + when true + if ALLOWED_ELEMENTS.include?(node.name) + process_attributes_for(node) + node.to_s else - node.to_s.unescapeHTML.escapeHTML - end + node.to_s.gsub(//, ">") + end + else + node.to_s.unescapeHTML.escapeHTML end - - html = new_text end - html + + results.join + end + + def sanitizeable?(text) + !(text.nil? || text.empty? || !text.index("<")) end protected diff --git a/test/unit/page_renderer_test.rb b/test/unit/page_renderer_test.rb index d50f9322..8f2ba29a 100644 --- a/test/unit/page_renderer_test.rb +++ b/test/unit/page_renderer_test.rb @@ -373,10 +373,10 @@ END_THM assert_markup_parsed_as( "

should we go " + "That Way or

\n
" + - "(1)ThisWay" + "ThisWay" + - "(1)
", + "", "should we go ThatWay or \n\\[ThisWay\\]\n") assert_markup_parsed_as( @@ -393,7 +393,7 @@ END_THM "That Way or

\n
" + "ThisWay$" + - "100 ThatWay" + + "100ThatWay" + "ThisWay \\$100 " + "ThatWay
", "should we go ThatWay or \n$$ThisWay \\$100 ThatWay $$\n")