diff --git a/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb b/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb
index 9168ba4d..3df5c0de 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb
@@ -4,6 +4,17 @@ module HTML5lib
# This module provides sanitization of XHTML+MathML+SVG
# and of inline style attributes.
+#
+# It can be either at the Tokenizer stage:
+#
+# HTMLParser.parse(html, :tokenizer => HTMLSanitizer)
+#
+# or, if you already have a parse tree (in this example, a REXML tree),
+# at the Serializer stage:
+#
+# tokens = TreeWalkers.getTreeWalker('rexml').new(tree)
+# HTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
+# :sanitize => true})
module HTMLSanitizeModule
diff --git a/vendor/plugins/HTML5lib/tests/test_sanitizer.rb b/vendor/plugins/HTML5lib/tests/test_sanitizer.rb
index b8d6fc57..24a5e232 100644
--- a/vendor/plugins/HTML5lib/tests/test_sanitizer.rb
+++ b/vendor/plugins/HTML5lib/tests/test_sanitizer.rb
@@ -19,6 +19,19 @@ class SanitizeTest < Test::Unit::TestCase
HTMLParser.parseFragment(stream, :tokenizer => HTMLSanitizer).join('').gsub(/'/,'"')
end
+ def sanitize_rexml stream
+ require 'rexml/document'
+ doc = REXML::Document.new("
(.*)<\/div>$/, '\1')
+ end
+
HTMLSanitizer::ALLOWED_ELEMENTS.each do |tag_name|
next if %w[caption col colgroup optgroup option table tbody td tfoot th thead tr].include?(tag_name) ### TODO
define_method "test_should_allow_#{tag_name}_tag" do
@@ -33,6 +46,8 @@ class SanitizeTest < Test::Unit::TestCase
sanitize_html("<#{tag_name} title='1'>foo bar baz#{tag_name}>")
assert_equal "<#{tag_name} title=\"1\">foo <bad>bar</bad> baz#{tag_name}>",
sanitize_xhtml("<#{tag_name} title='1'>foo bar baz#{tag_name}>")
+ assert_equal "<#{tag_name} title=\"1\">foo <bad>bar</bad> baz#{tag_name}>",
+ sanitize_rexml("<#{tag_name} title='1'>foo bar baz#{tag_name}>")
end
end
end
@@ -41,6 +56,8 @@ class SanitizeTest < Test::Unit::TestCase
define_method "test_should_forbid_#{tag_name.upcase}_tag" do
assert_equal "<#{tag_name.upcase} title=\"1\">foo <bad>bar</bad> baz</#{tag_name.upcase}>",
sanitize_html("<#{tag_name.upcase} title='1'>foo bar baz#{tag_name.upcase}>")
+ assert_equal "<#{tag_name.upcase} title=\"1\">foo <bad>bar</bad> baz</#{tag_name.upcase}>",
+ sanitize_rexml("<#{tag_name.upcase} title='1'>foo bar baz#{tag_name.upcase}>")
end
end
@@ -51,6 +68,8 @@ class SanitizeTest < Test::Unit::TestCase
sanitize_html("
foo bar baz
")
assert_equal "
foo <bad>bar</bad> baz
",
sanitize_xhtml("
foo bar baz
")
+ assert_equal "
foo <bad>bar</bad> baz
",
+ sanitize_rexml("
foo bar baz
")
end
end
@@ -58,6 +77,8 @@ class SanitizeTest < Test::Unit::TestCase
define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
assert_equal "
foo <bad>bar</bad> baz
",
sanitize_html("
foo bar baz
")
+ assert_equal "
foo <bad>bar</bad> baz
",
+ sanitize_rexml("
foo bar baz
")
end
end
@@ -65,6 +86,8 @@ class SanitizeTest < Test::Unit::TestCase
define_method "test_should_allow_#{protocol}_uris" do
assert_equal "foo",
sanitize_html(%(foo))
+ assert_equal "foo",
+ sanitize_rexml(%(foo))
end
end
@@ -72,44 +95,57 @@ class SanitizeTest < Test::Unit::TestCase
define_method "test_should_allow_uppercase_#{protocol}_uris" do
assert_equal "foo",
sanitize_html(%(foo))
+ assert_equal "foo",
+ sanitize_rexml(%(foo))
end
end
def test_should_allow_anchors
assert_equal "<script>baz</script>",
sanitize_html("")
+ assert_equal "<script>baz</script>",
+ sanitize_rexml("")
end
# RFC 3986, sec 4.2
def test_allow_colons_in_path_component
assert_equal "foo",
sanitize_html("foo")
+ assert_equal "foo",
+ sanitize_rexml("foo")
end
%w(src width height alt).each do |img_attr|
define_method "test_should_allow_image_#{img_attr}_attribute" do
assert_equal "",
sanitize_html("")
+ assert_equal "",
+ sanitize_rexml("")
end
end
def test_should_handle_non_html
assert_equal 'abc', sanitize_html("abc")
+ assert_equal 'abc', sanitize_rexml("abc")
end
def test_should_handle_blank_text
assert_equal '', sanitize_html('')
+ assert_equal '', sanitize_rexml('')
end
[%w(img src), %w(a href)].each do |(tag, attr)|
close = VOID_ELEMENTS.include?(tag) ? "/>boo" : ">boo#{tag}>"
+ xclose = VOID_ELEMENTS.include?(tag) ? " />" : ">boo#{tag}>"
define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do
- assert_equal %(<#{tag} title="1"#{close}), sanitize_html(%(<#{tag} #{attr}="javascript:XSS" title="1">boo#{tag}>))
+ assert_equal %(<#{tag} title="1"#{close}), sanitize_html(%(<#{tag} #{attr}="javascript:XSS" title="1">boo#{tag}>))
+ assert_equal %(<#{tag} title="1"#{xclose}), sanitize_rexml(%(<#{tag} #{attr}="javascript:XSS" title="1">boo#{tag}>))
end
define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols_and_whitespace" do
assert_equal %(<#{tag} title="1"#{close}), sanitize_html(%(<#{tag} #{attr}=" javascript:XSS" title="1">boo#{tag}>))
+ assert_equal %(<#{tag} title="1"#{xclose}), sanitize_rexml(%(<#{tag} #{attr}=" javascript:XSS" title="1">boo#{tag}>))
end
end
@@ -157,21 +193,28 @@ class SanitizeTest < Test::Unit::TestCase
def test_should_not_fall_for_ridiculous_hack
img_hack = %()
assert_equal "", sanitize_html(img_hack)
+ assert_equal "", sanitize_rexml(img_hack)
end
def test_platypus
assert_equal %(never trust your upstream platypus),
sanitize_html(%(never trust your upstream platypus))
+ assert_equal %(never trust your upstream platypus),
+ sanitize_rexml(%(never trust your upstream platypus))
end
def test_xul
assert_equal %(