REXML Trees
Synced with latest HTML5lib. Added preliminary support (currently disabled) for sanitizing REXML trees.
This commit is contained in:
parent
4dd70af5ae
commit
bd8ba1f4b1
28 changed files with 1317 additions and 112 deletions
|
@ -8,19 +8,36 @@ module Sanitize
|
|||
#
|
||||
# sanitize_xhtml() is a case-sensitive sanitizer, suitable for XHTML
|
||||
# sanitize_html() is a case-insensitive sanitizer suitable for HTML
|
||||
# sanitize_rexml() sanitized a REXML tree, returning a string
|
||||
|
||||
|
||||
require 'html5lib/sanitizer'
|
||||
require 'html5lib/html5parser'
|
||||
require 'html5lib/liberalxmlparser'
|
||||
|
||||
require 'html5lib/treewalkers'
|
||||
require 'html5lib/serializer'
|
||||
require 'string_utils'
|
||||
require 'html5lib/sanitizer'
|
||||
|
||||
include HTML5lib
|
||||
|
||||
def sanitize_xhtml(html)
|
||||
XHTMLParser.parseFragment(html, :tokenizer => HTMLSanitizer).to_s
|
||||
XHTMLParser.parseFragment(html.to_ncr, :tokenizer => HTMLSanitizer).to_s
|
||||
end
|
||||
|
||||
def sanitize_html(html)
|
||||
HTMLParser.parseFragment(html, :tokenizer => HTMLSanitizer).to_s
|
||||
end
|
||||
|
||||
def sanitize_rexml(tree)
|
||||
tokens = TreeWalkers.getTreeWalker('rexml').new(tree.to_ncr)
|
||||
HTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
|
||||
:quote_attr_values => 'true',
|
||||
:minimize_boolean_attributes => 'false',
|
||||
:use_trailing_solidus => 'true',
|
||||
:space_before_trailing_solidus => 'true',
|
||||
:omit_optional_tags => 'false',
|
||||
:inject_meta_charset => 'false',
|
||||
:sanitize => 'true'})
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue