New Sanitizer Goes Live
The new sanitizer seems to work well (cuts the time required to produce the Instiki Atom feed in half). Our strategy is to use HTML5lib for <nowiki> content, but to use the new sanitizer for content that has been processed by Maruku (and hence is well-formed). The one broken unit test won't affect us (since it dealt with very malformed HTML).
This commit is contained in:
parent
800880f382
commit
45405fc97e
|
@ -16,6 +16,9 @@ require 'chunks/chunk'
|
||||||
|
|
||||||
class NoWiki < Chunk::Abstract
|
class NoWiki < Chunk::Abstract
|
||||||
|
|
||||||
|
require 'sanitize'
|
||||||
|
include Sanitize
|
||||||
|
|
||||||
NOWIKI_PATTERN = Regexp.new('<nowiki>(.*?)</nowiki>', Regexp::MULTILINE)
|
NOWIKI_PATTERN = Regexp.new('<nowiki>(.*?)</nowiki>', Regexp::MULTILINE)
|
||||||
def self.pattern() NOWIKI_PATTERN end
|
def self.pattern() NOWIKI_PATTERN end
|
||||||
|
|
||||||
|
@ -23,7 +26,7 @@ class NoWiki < Chunk::Abstract
|
||||||
|
|
||||||
def initialize(match_data, content)
|
def initialize(match_data, content)
|
||||||
super
|
super
|
||||||
@plain_text = @unmask_text = match_data[1]
|
@plain_text = @unmask_text = sanitize_xhtml(match_data[1])
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -120,7 +120,7 @@ module Sanitizer
|
||||||
# => <script> do_nasty_stuff() </script>
|
# => <script> do_nasty_stuff() </script>
|
||||||
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
|
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
|
||||||
# => <a>Click here for $100</a>
|
# => <a>Click here for $100</a>
|
||||||
def sanitize_xhtml(html)
|
def xhtml_sanitize(html)
|
||||||
if html.index("<")
|
if html.index("<")
|
||||||
tokenizer = HTML::Tokenizer.new(html.to_utf8)
|
tokenizer = HTML::Tokenizer.new(html.to_utf8)
|
||||||
new_text = ""
|
new_text = ""
|
||||||
|
@ -149,7 +149,7 @@ module Sanitizer
|
||||||
end
|
end
|
||||||
node.attributes.each do |attr,val|
|
node.attributes.each do |attr,val|
|
||||||
if String === val
|
if String === val
|
||||||
node.attributes[attr] = CGI.escapeHTML(val.unescapeHTML)
|
node.attributes[attr] = CGI.escapeHTML(CGI.unescapeHTML(val))
|
||||||
else
|
else
|
||||||
node.attributes.delete attr
|
node.attributes.delete attr
|
||||||
end
|
end
|
||||||
|
@ -160,7 +160,7 @@ module Sanitizer
|
||||||
node.to_s.gsub(/</, "<").gsub(/>/, ">")
|
node.to_s.gsub(/</, "<").gsub(/>/, ">")
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
CGI.escapeHTML(node.to_s.unescapeHTML)
|
node.to_s.unescapeHTML.escapeHTML
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -2211,12 +2211,17 @@ class String
|
||||||
|
|
||||||
#:stopdoc:
|
#:stopdoc:
|
||||||
|
|
||||||
|
def escapeHTML
|
||||||
|
self.gsub( /&/, "&" ).
|
||||||
|
gsub( /</, "<" ).
|
||||||
|
gsub( />/, ">" )
|
||||||
|
end
|
||||||
|
|
||||||
def unescapeHTML
|
def unescapeHTML
|
||||||
self.gsub(/&(.*?);/n) do
|
self.gsub(/&(.*?);/n) do
|
||||||
match = $1.dup
|
match = $1.dup
|
||||||
case match
|
case match
|
||||||
when /\Aamp\z/ni then '&'
|
when /\Aamp\z/ni then '&'
|
||||||
when /\Aquot\z/ni then '"'
|
|
||||||
when /\Agt\z/ni then '>'
|
when /\Agt\z/ni then '>'
|
||||||
when /\Alt\z/ni then '<'
|
when /\Alt\z/ni then '<'
|
||||||
when /\A#0*(\d+)\z/n then
|
when /\A#0*(\d+)\z/n then
|
||||||
|
|
|
@ -5,7 +5,6 @@ require_dependency 'chunks/include'
|
||||||
require_dependency 'chunks/wiki'
|
require_dependency 'chunks/wiki'
|
||||||
require_dependency 'chunks/literal'
|
require_dependency 'chunks/literal'
|
||||||
require 'chunks/nowiki'
|
require 'chunks/nowiki'
|
||||||
require 'sanitize'
|
|
||||||
|
|
||||||
# Wiki content is just a string that can process itself with a chain of
|
# Wiki content is just a string that can process itself with a chain of
|
||||||
# actions. The actions can modify wiki content so that certain parts of
|
# actions. The actions can modify wiki content so that certain parts of
|
||||||
|
@ -113,8 +112,9 @@ end
|
||||||
|
|
||||||
class WikiContent < String
|
class WikiContent < String
|
||||||
|
|
||||||
|
require 'sanitizer'
|
||||||
include ChunkManager
|
include ChunkManager
|
||||||
include Sanitize
|
include Sanitizer
|
||||||
|
|
||||||
DEFAULT_OPTS = {
|
DEFAULT_OPTS = {
|
||||||
:active_chunks => ACTIVE_CHUNKS,
|
:active_chunks => ACTIVE_CHUNKS,
|
||||||
|
@ -193,7 +193,7 @@ class WikiContent < String
|
||||||
chunk.unmask_text
|
chunk.unmask_text
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
self.replace sanitize_xhtml(self)
|
self.replace xhtml_sanitize(self)
|
||||||
end
|
end
|
||||||
|
|
||||||
def page_name
|
def page_name
|
||||||
|
|
|
@ -359,7 +359,7 @@
|
||||||
"name": "should_sanitize_script_tag_with_multiple_open_brackets",
|
"name": "should_sanitize_script_tag_with_multiple_open_brackets",
|
||||||
"input": "<<script>alert(\"XSS\");//<</script>",
|
"input": "<<script>alert(\"XSS\");//<</script>",
|
||||||
"output": "<<script>alert(\"XSS\");//<</script>",
|
"output": "<<script>alert(\"XSS\");//<</script>",
|
||||||
"xhtml": "<<script>alert("XSS");//<</script>",
|
"xhtml": "<<script>alert(\"XSS\");//<</script>",
|
||||||
"rexml": "Ill-formed XHTML!"
|
"rexml": "Ill-formed XHTML!"
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -375,7 +375,7 @@
|
||||||
"name": "should_sanitize_tag_broken_up_by_null",
|
"name": "should_sanitize_tag_broken_up_by_null",
|
||||||
"input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>",
|
"input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>",
|
||||||
"output": "<scr\ufffdipt>alert(\"XSS\")</scr\ufffdipt>",
|
"output": "<scr\ufffdipt>alert(\"XSS\")</scr\ufffdipt>",
|
||||||
"xhtml": "<scr>alert("XSS")</scr>",
|
"xhtml": "<scr>alert(\"XSS\")</scr>",
|
||||||
"rexml": "Ill-formed XHTML!"
|
"rexml": "Ill-formed XHTML!"
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
|
@ -18,9 +18,9 @@ class NoWikiTest < Test::Unit::TestCase
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_no_sanitize_nowiki
|
def test_sanitize_nowiki
|
||||||
match(NoWiki, 'This sentence contains <nowiki>[[test]]&<a href="a&b">shebang</a> <script>alert("xss!");</script> *foo*</nowiki>. Do not touch!',
|
match(NoWiki, 'This sentence contains <nowiki>[[test]]&<a href="a&b">shebang</a> <script>alert("xss!");</script> *foo*</nowiki>. Do not touch!',
|
||||||
:plain_text => '[[test]]&<a href="a&b">shebang</a> <script>alert("xss!");</script> *foo*'
|
:plain_text => "[[test]]&<a href='a&b'>shebang</a> <script>alert(\"xss!\");</script> *foo*"
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -85,8 +85,8 @@ class PageRendererTest < Test::Unit::TestCase
|
||||||
%{xmlns='http://www.w3.org/1998/Math/MathML'><mi>sin</mi><mo stretchy='false'>} +
|
%{xmlns='http://www.w3.org/1998/Math/MathML'><mi>sin</mi><mo stretchy='false'>} +
|
||||||
%{(</mo><mi>x</mi><mo stretchy='false'>)</mo><semantics><annotation-xml encoding='SVG1.1'>} +
|
%{(</mo><mi>x</mi><mo stretchy='false'>)</mo><semantics><annotation-xml encoding='SVG1.1'>} +
|
||||||
%{<svg/></annotation-xml></semantics></math><div class='maruku-eq-tex'><code style='display: none;'>} +
|
%{<svg/></annotation-xml></semantics></math><div class='maruku-eq-tex'><code style='display: none;'>} +
|
||||||
%{\\sin(x) \\begin{svg}<svg></svg>\\end{svg}</code></div></div>},
|
%{\\sin(x) \\begin{svg}<svg/>\\end{svg}</code></div></div>},
|
||||||
"$$\\sin(x) \\begin{svg}<svg></svg>\\end{svg}$$")
|
"$$\\sin(x) \\begin{svg}<svg/>\\end{svg}$$")
|
||||||
|
|
||||||
code_block = [
|
code_block = [
|
||||||
'This is a code block:',
|
'This is a code block:',
|
||||||
|
@ -264,7 +264,7 @@ class PageRendererTest < Test::Unit::TestCase
|
||||||
|
|
||||||
# currently, upper case HTML elements are not allowed
|
# currently, upper case HTML elements are not allowed
|
||||||
assert_markup_parsed_as(
|
assert_markup_parsed_as(
|
||||||
"<p>This <IMG SRC=\"http://hobix.com/sample.jpg\" alt=\"\"/> is an inline image link.</p>",
|
"<p>This <IMG SRC='http://hobix.com/sample.jpg' alt=''/> is an inline image link.</p>",
|
||||||
'This <IMG SRC="http://hobix.com/sample.jpg" alt="" /> is an inline image link.')
|
'This <IMG SRC="http://hobix.com/sample.jpg" alt="" /> is an inline image link.')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@ class SanitizerTest < Test::Unit::TestCase
|
||||||
end
|
end
|
||||||
|
|
||||||
def do_sanitize_xhtml stream
|
def do_sanitize_xhtml stream
|
||||||
sanitize_xhtml(stream.to_utf8)
|
xhtml_sanitize(stream)
|
||||||
end
|
end
|
||||||
|
|
||||||
def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
|
def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
|
||||||
|
|
Loading…
Reference in a new issue