Make Sanitizer::safe_xhtml_sanitize use Nokogiri

Also, update Bundler to 1.0.18.
This commit is contained in:
Jacques Distler 2011-08-19 19:32:53 -05:00
parent 4b2448b09a
commit 1e352e28a1
163 changed files with 1216 additions and 255 deletions

View file

@ -9,6 +9,7 @@ module Sanitizer
require 'node'
require 'instiki_stringsupport'
require 'set'
require 'nokogiri'
acceptable_elements = Set.new %w[a abbr acronym address area article aside
audio b big blockquote br button canvas caption center cite code
@ -227,9 +228,9 @@ module Sanitizer
# (REXML trees are always utf-8 encoded.)
def safe_xhtml_sanitize(html, options = {})
sanitized = xhtml_sanitize(html.purify)
doc = REXML::Document.new("<div xmlns='http://www.w3.org/1999/xhtml'>#{sanitized}</div>")
sanitized = doc.to_s.gsub(/\A<div xmlns='http:\/\/www.w3.org\/1999\/xhtml'>(.*)<\/div>\Z/m, '\1')
rescue REXML::ParseException
doc = Nokogiri::XML::Document.parse("<div xmlns='http://www.w3.org/1999/xhtml'>#{sanitized}</div>", nil, (options[:encoding] || 'UTF-8'), 0)
sanitized = doc.root.children.to_xml(:indent => (options[:indent] || 2), :save_with => 2 )
rescue Nokogiri::XML::SyntaxError
sanitized = sanitized.escapeHTML
end