New Sanitizer Goes Live

The new sanitizer seems to work well (cuts the time required
to produce the Instiki Atom feed in half). Our strategy is to
use HTML5lib for <nowiki> content, but to use the new sanitizer
for content that has been processed by Maruku (and hence is
well-formed).

The one broken unit test won't affect us (since it dealt with
very malformed HTML).
This commit is contained in:
Jacques Distler 2008-05-21 02:06:31 -05:00
parent 800880f382
commit 45405fc97e
8 changed files with 24 additions and 16 deletions

View file

@ -359,7 +359,7 @@
"name": "should_sanitize_script_tag_with_multiple_open_brackets",
"input": "<<script>alert(\"XSS\");//<</script>",
"output": "&lt;&lt;script&gt;alert(\"XSS\");//&lt;&lt;/script&gt;",
"xhtml": "&lt;&lt;script&gt;alert(&quot;XSS&quot;);//&lt;&lt;/script&gt;",
"xhtml": "&lt;&lt;script&gt;alert(\"XSS\");//&lt;&lt;/script&gt;",
"rexml": "Ill-formed XHTML!"
},
@ -375,7 +375,7 @@
"name": "should_sanitize_tag_broken_up_by_null",
"input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>",
"output": "&lt;scr\ufffdipt&gt;alert(\"XSS\")&lt;/scr\ufffdipt&gt;",
"xhtml": "&lt;scr&gt;alert(&quot;XSS&quot;)&lt;/scr&gt;",
"xhtml": "&lt;scr&gt;alert(\"XSS\")&lt;/scr&gt;",
"rexml": "Ill-formed XHTML!"
},

View file

@ -18,9 +18,9 @@ class NoWikiTest < Test::Unit::TestCase
)
end
def test_no_sanitize_nowiki
def test_sanitize_nowiki
match(NoWiki, 'This sentence contains <nowiki>[[test]]&<a href="a&b">shebang</a> <script>alert("xss!");</script> *foo*</nowiki>. Do not touch!',
:plain_text => '[[test]]&<a href="a&b">shebang</a> <script>alert("xss!");</script> *foo*'
:plain_text => "[[test]]&amp;<a href='a&amp;b'>shebang</a> &lt;script&gt;alert(\"xss!\");&lt;/script&gt; *foo*"
)
end

View file

@ -85,8 +85,8 @@ class PageRendererTest < Test::Unit::TestCase
%{xmlns='http://www.w3.org/1998/Math/MathML'><mi>sin</mi><mo stretchy='false'>} +
%{(</mo><mi>x</mi><mo stretchy='false'>)</mo><semantics><annotation-xml encoding='SVG1.1'>} +
%{<svg/></annotation-xml></semantics></math><div class='maruku-eq-tex'><code style='display: none;'>} +
%{\\sin(x) \\begin{svg}<svg></svg>\\end{svg}</code></div></div>},
"$$\\sin(x) \\begin{svg}<svg></svg>\\end{svg}$$")
%{\\sin(x) \\begin{svg}<svg/>\\end{svg}</code></div></div>},
"$$\\sin(x) \\begin{svg}<svg/>\\end{svg}$$")
code_block = [
'This is a code block:',
@ -264,7 +264,7 @@ class PageRendererTest < Test::Unit::TestCase
# currently, upper case HTML elements are not allowed
assert_markup_parsed_as(
"<p>This &lt;IMG SRC=\"http://hobix.com/sample.jpg\" alt=\"\"/&gt; is an inline image link.</p>",
"<p>This &lt;IMG SRC='http://hobix.com/sample.jpg' alt=''/&gt; is an inline image link.</p>",
'This <IMG SRC="http://hobix.com/sample.jpg" alt="" /> is an inline image link.')
end

View file

@ -14,7 +14,7 @@ class SanitizerTest < Test::Unit::TestCase
end
def do_sanitize_xhtml stream
sanitize_xhtml(stream.to_utf8)
xhtml_sanitize(stream)
end
def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)