More Sanitizer Refactoring
Make the Sanitizer more efficient. Also, update some unit tests.
This commit is contained in:
parent
9b7071d190
commit
d14db51d9e
|
@ -123,31 +123,33 @@ module Sanitizer
|
||||||
#
|
#
|
||||||
# xhtml_sanitize('<script> do_nasty_stuff() </script>')
|
# xhtml_sanitize('<script> do_nasty_stuff() </script>')
|
||||||
# => <script> do_nasty_stuff() </script>
|
# => <script> do_nasty_stuff() </script>
|
||||||
# xhtml_sanitize_xhtml('<a href="javascript: sucker();">Click here for $100</a>')
|
# xhtml_sanitize('<a href="javascript: sucker();">Click here for $100</a>')
|
||||||
# => <a>Click here for $100</a>
|
# => <a>Click here for $100</a>
|
||||||
def xhtml_sanitize(html)
|
def xhtml_sanitize(html)
|
||||||
if html.index("<")
|
return html unless sanitizeable?(html)
|
||||||
tokenizer = HTML::Tokenizer.new(html.to_utf8)
|
tokenizer = HTML::Tokenizer.new(html.to_utf8)
|
||||||
new_text = ""
|
results = []
|
||||||
|
|
||||||
while token = tokenizer.next
|
while token = tokenizer.next
|
||||||
node = XHTML::Node.parse(nil, 0, 0, token, false)
|
node = XHTML::Node.parse(nil, 0, 0, token, false)
|
||||||
new_text << case node.tag?
|
results << case node.tag?
|
||||||
when true
|
when true
|
||||||
if ALLOWED_ELEMENTS.include?(node.name)
|
if ALLOWED_ELEMENTS.include?(node.name)
|
||||||
process_attributes_for(node)
|
process_attributes_for(node)
|
||||||
node.to_s
|
node.to_s
|
||||||
else
|
|
||||||
node.to_s.gsub(/</, "<").gsub(/>/, ">")
|
|
||||||
end
|
|
||||||
else
|
else
|
||||||
node.to_s.unescapeHTML.escapeHTML
|
node.to_s.gsub(/</, "<").gsub(/>/, ">")
|
||||||
end
|
end
|
||||||
|
else
|
||||||
|
node.to_s.unescapeHTML.escapeHTML
|
||||||
end
|
end
|
||||||
|
|
||||||
html = new_text
|
|
||||||
end
|
end
|
||||||
html
|
|
||||||
|
results.join
|
||||||
|
end
|
||||||
|
|
||||||
|
def sanitizeable?(text)
|
||||||
|
!(text.nil? || text.empty? || !text.index("<"))
|
||||||
end
|
end
|
||||||
|
|
||||||
protected
|
protected
|
||||||
|
|
|
@ -373,10 +373,10 @@ END_THM
|
||||||
assert_markup_parsed_as(
|
assert_markup_parsed_as(
|
||||||
"<p>should we go <a class='existingWikiWord' href='../show/ThatWay'>" +
|
"<p>should we go <a class='existingWikiWord' href='../show/ThatWay'>" +
|
||||||
"That Way</a> or</p>\n<div class='maruku-equation' id='eq:eq1'>" +
|
"That Way</a> or</p>\n<div class='maruku-equation' id='eq:eq1'>" +
|
||||||
"<math class='maruku-mathml' display='block' " +
|
"<span class='maruku-eq-number'>(1)</span><math class='maruku-mathml' display='block' " +
|
||||||
"xmlns='http://www.w3.org/1998/Math/MathML'><mi>ThisWay</mi></math>" +
|
"xmlns='http://www.w3.org/1998/Math/MathML'><mi>ThisWay</mi></math>" +
|
||||||
"<span class='maruku-eq-tex'><code style='display: none;'>ThisWay</code>" +
|
"<span class='maruku-eq-tex'><code style='display: none;'>ThisWay</code>" +
|
||||||
"</span><span class='maruku-eq-number'>(1)</span></div>",
|
"</span></div>",
|
||||||
"should we go ThatWay or \n\\[ThisWay\\]\n")
|
"should we go ThatWay or \n\\[ThisWay\\]\n")
|
||||||
|
|
||||||
assert_markup_parsed_as(
|
assert_markup_parsed_as(
|
||||||
|
@ -393,7 +393,7 @@ END_THM
|
||||||
"That Way</a> or</p>\n<div class='maruku-equation'>" +
|
"That Way</a> or</p>\n<div class='maruku-equation'>" +
|
||||||
"<math class='maruku-mathml' display='block' " +
|
"<math class='maruku-mathml' display='block' " +
|
||||||
"xmlns='http://www.w3.org/1998/Math/MathML'><mi>ThisWay</mi><mi>$</mi>" +
|
"xmlns='http://www.w3.org/1998/Math/MathML'><mi>ThisWay</mi><mi>$</mi>" +
|
||||||
"<mn>100 </mn><mi>ThatWay</mi></math>" +
|
"<mn>100</mn><mi>ThatWay</mi></math>" +
|
||||||
"<span class='maruku-eq-tex'><code style='display: none;'>ThisWay \\$100 " +
|
"<span class='maruku-eq-tex'><code style='display: none;'>ThisWay \\$100 " +
|
||||||
"ThatWay</code></span></div>",
|
"ThatWay</code></span></div>",
|
||||||
"should we go ThatWay or \n$$ThisWay \\$100 ThatWay $$\n")
|
"should we go ThatWay or \n$$ThisWay \\$100 ThatWay $$\n")
|
||||||
|
|
Loading…
Reference in a new issue