Make Sanitizer::safe_xhtml_sanitize use Nokogiri

Also, update Bundler to 1.0.18.
This commit is contained in:
Jacques Distler 2011-08-19 19:32:53 -05:00
parent 4b2448b09a
commit 1e352e28a1
163 changed files with 1216 additions and 255 deletions

View file

@ -72,8 +72,6 @@ module Engines
my_content.to_s5
else
(t = Time.now; nil)
puts "text is #{text.class}"
puts "text responds to concat_with_safety" if text.respond_to?(:concat_with_safety)
html = Maruku.new(text,
{:math_enabled => true,
:math_numbered => ['\\[','\\begin{equation}']}).to_html

View file

@ -9,6 +9,7 @@ module Sanitizer
require 'node'
require 'instiki_stringsupport'
require 'set'
require 'nokogiri'
acceptable_elements = Set.new %w[a abbr acronym address area article aside
audio b big blockquote br button canvas caption center cite code
@ -227,9 +228,9 @@ module Sanitizer
# (REXML trees are always utf-8 encoded.)
def safe_xhtml_sanitize(html, options = {})
sanitized = xhtml_sanitize(html.purify)
doc = REXML::Document.new("<div xmlns='http://www.w3.org/1999/xhtml'>#{sanitized}</div>")
sanitized = doc.to_s.gsub(/\A<div xmlns='http:\/\/www.w3.org\/1999\/xhtml'>(.*)<\/div>\Z/m, '\1')
rescue REXML::ParseException
doc = Nokogiri::XML::Document.parse("<div xmlns='http://www.w3.org/1999/xhtml'>#{sanitized}</div>", nil, (options[:encoding] || 'UTF-8'), 0)
sanitized = doc.root.children.to_xml(:indent => (options[:indent] || 2), :save_with => 2 )
rescue Nokogiri::XML::SyntaxError
sanitized = sanitized.escapeHTML
end