Put the "safe" XHTML sanitization in lib/santize.rb, rather than in lib/chunks/nowiki.rb.
D'oh!
This commit is contained in:
Jacques Distler 2008-12-01 10:29:46 -06:00
parent 758325923f
commit 513b2b16c1
3 changed files with 23 additions and 11 deletions

View file

@ -1,6 +1,5 @@
require 'chunks/chunk'
require 'sanitize'
require 'rexml/document'
# This chunks allows certain parts of a wiki page to be hidden from the
# rest of the rendering pipeline. It should be run at the beginning
@ -27,15 +26,7 @@ class NoWiki < Chunk::Abstract
def initialize(match_data, content)
super
begin
sanitized = sanitize_xhtml(match_data[1])
doc = REXML::Document.new("<div xmlns='http://www.w3.org/1999/xhtml'>#{sanitized}</div>")
sanitized = doc.to_s.gsub(/\A<div xmlns='http:\/\/www.w3.org\/1999\/xhtml'>(.*)<\/div>\Z/m, '\1')
rescue REXML::ParseException
sanitized = %{<pre class='markdown-html-error' style='border: solid 3px red; background-color: pink;'>HTML parse error:
#{sanitized.escapeHTML}</pre>}
end
@plain_text = @unmask_text = sanitized
@plain_text = @unmask_text = safe_sanitize_xhtml(match_data[1])
end
end

View file

@ -9,6 +9,8 @@
# sanitize_xhtml() is a case-sensitive sanitizer, suitable for XHTML
# sanitize_html() is a case-insensitive sanitizer suitable for HTML
# sanitize_rexml() sanitizes a REXML tree, returning a string
# safe_sanitize_xhtml() makes extra-sure that the result is well-formed XHTML
# by running the output of sanitize_xhtml() through REXML
#
# == Files
#
@ -69,6 +71,25 @@ module Sanitize
return parsed if @to_tree
return parsed.to_s
end
# Sanitize a string, parsed using XHTML parsing rules. Reparse the result to
# ensure well-formedness.
#
# :call-seq:
# safe_sanitize_xhtml(string) -> string
#
# Unless otherwise specified, the string is assumed to be utf-8 encoded.
#
# The string returned is utf-8 encoded. If you want, you can use iconv to convert it to some other encoding.
# (REXML trees are always utf-8 encoded.)
def safe_sanitize_xhtml(html, options = {})
options[:to_tree] = false
sanitized = sanitize_xhtml(html, options)
doc = REXML::Document.new("<div xmlns='http://www.w3.org/1999/xhtml'>#{sanitized}</div>")
sanitized = doc.to_s.gsub(/\A<div xmlns='http:\/\/www.w3.org\/1999\/xhtml'>(.*)<\/div>\Z/m, '\1')
rescue REXML::ParseException
sanitized = sanitized.escapeHTML
end
# Sanitize a string, parsed using HTML parsing rules.
#

View file

@ -26,7 +26,7 @@ class NoWikiTest < Test::Unit::TestCase
def test_sanitize_nowiki_ill_formed
match(NoWiki, "<nowiki><animateColor xlink:href='#foo'/></nowiki>",
:plain_text => "<pre class='markdown-html-error' style='border: solid 3px red; background-color: pink;'>HTML parse error:\n&lt;animateColor xlink:href=&#39;#foo&#39;&gt;&lt;/animateColor&gt;</pre>"
:plain_text => "&lt;animateColor xlink:href=&#39;#foo&#39;&gt;&lt;/animateColor&gt;"
)
end