More Sanitizer Refactoring

Make the Sanitizer more efficient.
Also, update some unit tests.
This commit is contained in:
Jacques Distler 2009-10-09 23:18:17 -05:00
parent 9b7071d190
commit d14db51d9e
2 changed files with 24 additions and 22 deletions

View file

@ -123,31 +123,33 @@ module Sanitizer
# #
# xhtml_sanitize('<script> do_nasty_stuff() </script>') # xhtml_sanitize('<script> do_nasty_stuff() </script>')
# => &lt;script> do_nasty_stuff() &lt;/script> # => &lt;script> do_nasty_stuff() &lt;/script>
# xhtml_sanitize_xhtml('<a href="javascript: sucker();">Click here for $100</a>') # xhtml_sanitize('<a href="javascript: sucker();">Click here for $100</a>')
# => <a>Click here for $100</a> # => <a>Click here for $100</a>
def xhtml_sanitize(html) def xhtml_sanitize(html)
if html.index("<") return html unless sanitizeable?(html)
tokenizer = HTML::Tokenizer.new(html.to_utf8) tokenizer = HTML::Tokenizer.new(html.to_utf8)
new_text = "" results = []
while token = tokenizer.next while token = tokenizer.next
node = XHTML::Node.parse(nil, 0, 0, token, false) node = XHTML::Node.parse(nil, 0, 0, token, false)
new_text << case node.tag? results << case node.tag?
when true when true
if ALLOWED_ELEMENTS.include?(node.name) if ALLOWED_ELEMENTS.include?(node.name)
process_attributes_for(node) process_attributes_for(node)
node.to_s node.to_s
else
node.to_s.gsub(/</, "&lt;").gsub(/>/, "&gt;")
end
else else
node.to_s.unescapeHTML.escapeHTML node.to_s.gsub(/</, "&lt;").gsub(/>/, "&gt;")
end end
else
node.to_s.unescapeHTML.escapeHTML
end end
html = new_text
end end
html
results.join
end
def sanitizeable?(text)
!(text.nil? || text.empty? || !text.index("<"))
end end
protected protected

View file

@ -373,10 +373,10 @@ END_THM
assert_markup_parsed_as( assert_markup_parsed_as(
"<p>should we go <a class='existingWikiWord' href='../show/ThatWay'>" + "<p>should we go <a class='existingWikiWord' href='../show/ThatWay'>" +
"That Way</a> or</p>\n<div class='maruku-equation' id='eq:eq1'>" + "That Way</a> or</p>\n<div class='maruku-equation' id='eq:eq1'>" +
"<math class='maruku-mathml' display='block' " + "<span class='maruku-eq-number'>(1)</span><math class='maruku-mathml' display='block' " +
"xmlns='http://www.w3.org/1998/Math/MathML'><mi>ThisWay</mi></math>" + "xmlns='http://www.w3.org/1998/Math/MathML'><mi>ThisWay</mi></math>" +
"<span class='maruku-eq-tex'><code style='display: none;'>ThisWay</code>" + "<span class='maruku-eq-tex'><code style='display: none;'>ThisWay</code>" +
"</span><span class='maruku-eq-number'>(1)</span></div>", "</span></div>",
"should we go ThatWay or \n\\[ThisWay\\]\n") "should we go ThatWay or \n\\[ThisWay\\]\n")
assert_markup_parsed_as( assert_markup_parsed_as(
@ -393,7 +393,7 @@ END_THM
"That Way</a> or</p>\n<div class='maruku-equation'>" + "That Way</a> or</p>\n<div class='maruku-equation'>" +
"<math class='maruku-mathml' display='block' " + "<math class='maruku-mathml' display='block' " +
"xmlns='http://www.w3.org/1998/Math/MathML'><mi>ThisWay</mi><mi>$</mi>" + "xmlns='http://www.w3.org/1998/Math/MathML'><mi>ThisWay</mi><mi>$</mi>" +
"<mn>100 </mn><mi>ThatWay</mi></math>" + "<mn>100</mn><mi>ThatWay</mi></math>" +
"<span class='maruku-eq-tex'><code style='display: none;'>ThisWay \\$100 " + "<span class='maruku-eq-tex'><code style='display: none;'>ThisWay \\$100 " +
"ThatWay</code></span></div>", "ThatWay</code></span></div>",
"should we go ThatWay or \n$$ThisWay \\$100 ThatWay $$\n") "should we go ThatWay or \n$$ThisWay \\$100 ThatWay $$\n")