diff --git a/lib/chunks/nowiki.rb b/lib/chunks/nowiki.rb index db67c847..23f97f6f 100644 --- a/lib/chunks/nowiki.rb +++ b/lib/chunks/nowiki.rb @@ -16,6 +16,9 @@ require 'chunks/chunk' class NoWiki < Chunk::Abstract + require 'sanitize' + include Sanitize + NOWIKI_PATTERN = Regexp.new('(.*?)', Regexp::MULTILINE) def self.pattern() NOWIKI_PATTERN end @@ -23,7 +26,7 @@ class NoWiki < Chunk::Abstract def initialize(match_data, content) super - @plain_text = @unmask_text = match_data[1] + @plain_text = @unmask_text = sanitize_xhtml(match_data[1]) end end diff --git a/lib/sanitizer.rb b/lib/sanitizer.rb index adc3f3bb..031ee465 100644 --- a/lib/sanitizer.rb +++ b/lib/sanitizer.rb @@ -120,7 +120,7 @@ module Sanitizer # => <script> do_nasty_stuff() </script> # sanitize_html('Click here for $100') # => Click here for $100 - def sanitize_xhtml(html) + def xhtml_sanitize(html) if html.index("<") tokenizer = HTML::Tokenizer.new(html.to_utf8) new_text = "" @@ -149,7 +149,7 @@ module Sanitizer end node.attributes.each do |attr,val| if String === val - node.attributes[attr] = CGI.escapeHTML(val.unescapeHTML) + node.attributes[attr] = CGI.escapeHTML(CGI.unescapeHTML(val)) else node.attributes.delete attr end @@ -160,7 +160,7 @@ module Sanitizer node.to_s.gsub(//, ">") end else - CGI.escapeHTML(node.to_s.unescapeHTML) + node.to_s.unescapeHTML.escapeHTML end end diff --git a/lib/stringsupport.rb b/lib/stringsupport.rb index aa076622..b938dfef 100644 --- a/lib/stringsupport.rb +++ b/lib/stringsupport.rb @@ -2211,12 +2211,17 @@ class String #:stopdoc: + def escapeHTML + self.gsub( /&/, "&" ). + gsub( //, ">" ) + end + def unescapeHTML self.gsub(/&(.*?);/n) do match = $1.dup case match when /\Aamp\z/ni then '&' - when /\Aquot\z/ni then '"' when /\Agt\z/ni then '>' when /\Alt\z/ni then '<' when /\A#0*(\d+)\z/n then diff --git a/lib/wiki_content.rb b/lib/wiki_content.rb index 521e5b4f..b72b20b3 100644 --- a/lib/wiki_content.rb +++ b/lib/wiki_content.rb @@ -5,7 +5,6 @@ require_dependency 'chunks/include' require_dependency 'chunks/wiki' require_dependency 'chunks/literal' require 'chunks/nowiki' -require 'sanitize' # Wiki content is just a string that can process itself with a chain of # actions. The actions can modify wiki content so that certain parts of @@ -113,8 +112,9 @@ end class WikiContent < String + require 'sanitizer' include ChunkManager - include Sanitize + include Sanitizer DEFAULT_OPTS = { :active_chunks => ACTIVE_CHUNKS, @@ -193,7 +193,7 @@ class WikiContent < String chunk.unmask_text end end - self.replace sanitize_xhtml(self) + self.replace xhtml_sanitize(self) end def page_name diff --git a/test/sanitizer.dat b/test/sanitizer.dat index ec781cb9..2929ca9c 100644 --- a/test/sanitizer.dat +++ b/test/sanitizer.dat @@ -359,7 +359,7 @@ "name": "should_sanitize_script_tag_with_multiple_open_brackets", "input": "<", "output": "<<script>alert(\"XSS\");//<</script>", - "xhtml": "<<script>alert("XSS");//<</script>", + "xhtml": "<<script>alert(\"XSS\");//<</script>", "rexml": "Ill-formed XHTML!" }, @@ -375,7 +375,7 @@ "name": "should_sanitize_tag_broken_up_by_null", "input": "alert(\"XSS\")", "output": "<scr\ufffdipt>alert(\"XSS\")</scr\ufffdipt>", - "xhtml": "<scr>alert("XSS")</scr>", + "xhtml": "<scr>alert(\"XSS\")</scr>", "rexml": "Ill-formed XHTML!" }, diff --git a/test/unit/chunks/nowiki_test.rb b/test/unit/chunks/nowiki_test.rb index fd3b40cc..a8915ef3 100755 --- a/test/unit/chunks/nowiki_test.rb +++ b/test/unit/chunks/nowiki_test.rb @@ -18,9 +18,9 @@ class NoWikiTest < Test::Unit::TestCase ) end - def test_no_sanitize_nowiki + def test_sanitize_nowiki match(NoWiki, 'This sentence contains [[test]]&shebang *foo*. Do not touch!', - :plain_text => '[[test]]&shebang *foo*' + :plain_text => "[[test]]&shebang <script>alert(\"xss!\");</script> *foo*" ) end diff --git a/test/unit/page_renderer_test.rb b/test/unit/page_renderer_test.rb index 5a51540f..041c0fae 100644 --- a/test/unit/page_renderer_test.rb +++ b/test/unit/page_renderer_test.rb @@ -85,8 +85,8 @@ class PageRendererTest < Test::Unit::TestCase %{xmlns='http://www.w3.org/1998/Math/MathML'>sin} + %{(x)} + %{
} + - %{\\sin(x) \\begin{svg}\\end{svg}
}, - "$$\\sin(x) \\begin{svg}\\end{svg}$$") + %{\\sin(x) \\begin{svg}\\end{svg}}, + "$$\\sin(x) \\begin{svg}\\end{svg}$$") code_block = [ 'This is a code block:', @@ -264,7 +264,7 @@ class PageRendererTest < Test::Unit::TestCase # currently, upper case HTML elements are not allowed assert_markup_parsed_as( - "

This <IMG SRC=\"http://hobix.com/sample.jpg\" alt=\"\"/> is an inline image link.

", + "

This <IMG SRC='http://hobix.com/sample.jpg' alt=''/> is an inline image link.

", 'This is an inline image link.') end diff --git a/test/unit/sanitizer_test.rb b/test/unit/sanitizer_test.rb index b8c8c83f..f35260c8 100644 --- a/test/unit/sanitizer_test.rb +++ b/test/unit/sanitizer_test.rb @@ -14,7 +14,7 @@ class SanitizerTest < Test::Unit::TestCase end def do_sanitize_xhtml stream - sanitize_xhtml(stream.to_utf8) + xhtml_sanitize(stream) end def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)