instiki/lib/sanitize.rb

44 lines
1.2 KiB
Ruby
Raw Normal View History

module Sanitize
# This module provides sanitization of XHTML+MathML+SVG
# and of inline style attributes.
#
# Uses the HTML5lib parser, so that the parsing behaviour should
# resemble that of browsers.
#
# sanitize_xhtml() is a case-sensitive sanitizer, suitable for XHTML
# sanitize_html() is a case-insensitive sanitizer suitable for HTML
# sanitize_rexml() sanitized a REXML tree, returning a string
require 'html5lib/html5parser'
require 'html5lib/liberalxmlparser'
require 'html5lib/treewalkers'
require 'html5lib/serializer'
require 'string_utils'
require 'html5lib/sanitizer'
include HTML5lib
def sanitize_xhtml(html)
XHTMLParser.parseFragment(html.to_ncr, :tokenizer => HTMLSanitizer).to_s
end
def sanitize_html(html)
HTMLParser.parseFragment(html, :tokenizer => HTMLSanitizer).to_s
end
def sanitize_rexml(tree)
tokens = TreeWalkers.getTreeWalker('rexml').new(tree.to_ncr)
HTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
:quote_attr_values => 'true',
:minimize_boolean_attributes => 'false',
:use_trailing_solidus => 'true',
:space_before_trailing_solidus => 'true',
:omit_optional_tags => 'false',
:inject_meta_charset => 'false',
:sanitize => 'true'})
end
end