diff --git a/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb b/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb index 9168ba4d..3df5c0de 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb +++ b/vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb @@ -4,6 +4,17 @@ module HTML5lib # This module provides sanitization of XHTML+MathML+SVG # and of inline style attributes. +# +# It can be either at the Tokenizer stage: +# +# HTMLParser.parse(html, :tokenizer => HTMLSanitizer) +# +# or, if you already have a parse tree (in this example, a REXML tree), +# at the Serializer stage: +# +# tokens = TreeWalkers.getTreeWalker('rexml').new(tree) +# HTMLSerializer.serialize(tokens, {:encoding=>'utf-8', +# :sanitize => true}) module HTMLSanitizeModule diff --git a/vendor/plugins/HTML5lib/tests/test_sanitizer.rb b/vendor/plugins/HTML5lib/tests/test_sanitizer.rb index b8d6fc57..24a5e232 100644 --- a/vendor/plugins/HTML5lib/tests/test_sanitizer.rb +++ b/vendor/plugins/HTML5lib/tests/test_sanitizer.rb @@ -19,6 +19,19 @@ class SanitizeTest < Test::Unit::TestCase HTMLParser.parseFragment(stream, :tokenizer => HTMLSanitizer).join('').gsub(/'/,'"') end + def sanitize_rexml stream + require 'rexml/document' + doc = REXML::Document.new("
#{stream}
") + tokens = TreeWalkers.getTreeWalker('rexml').new(doc) + HTMLSerializer.serialize(tokens, {:encoding=>'utf-8', + :quote_attr_values => true, + :minimize_boolean_attributes => false, + :use_trailing_solidus => true, + :omit_optional_tags => false, + :inject_meta_charset => false, + :sanitize => true}).gsub(/^
(.*)<\/div>$/, '\1') + end + HTMLSanitizer::ALLOWED_ELEMENTS.each do |tag_name| next if %w[caption col colgroup optgroup option table tbody td tfoot th thead tr].include?(tag_name) ### TODO define_method "test_should_allow_#{tag_name}_tag" do @@ -33,6 +46,8 @@ class SanitizeTest < Test::Unit::TestCase sanitize_html("<#{tag_name} title='1'>foo bar baz") assert_equal "<#{tag_name} title=\"1\">foo <bad>bar</bad> baz", sanitize_xhtml("<#{tag_name} title='1'>foo bar baz") + assert_equal "<#{tag_name} title=\"1\">foo <bad>bar</bad> baz", + sanitize_rexml("<#{tag_name} title='1'>foo bar baz") end end end @@ -41,6 +56,8 @@ class SanitizeTest < Test::Unit::TestCase define_method "test_should_forbid_#{tag_name.upcase}_tag" do assert_equal "<#{tag_name.upcase} title=\"1\">foo <bad>bar</bad> baz</#{tag_name.upcase}>", sanitize_html("<#{tag_name.upcase} title='1'>foo bar baz") + assert_equal "<#{tag_name.upcase} title=\"1\">foo <bad>bar</bad> baz</#{tag_name.upcase}>", + sanitize_rexml("<#{tag_name.upcase} title='1'>foo bar baz") end end @@ -51,6 +68,8 @@ class SanitizeTest < Test::Unit::TestCase sanitize_html("

foo bar baz

") assert_equal "

foo <bad>bar</bad> baz

", sanitize_xhtml("

foo bar baz

") + assert_equal "

foo <bad>bar</bad> baz

", + sanitize_rexml("

foo bar baz

") end end @@ -58,6 +77,8 @@ class SanitizeTest < Test::Unit::TestCase define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do assert_equal "

foo <bad>bar</bad> baz

", sanitize_html("

foo bar baz

") + assert_equal "

foo <bad>bar</bad> baz

", + sanitize_rexml("

foo bar baz

") end end @@ -65,6 +86,8 @@ class SanitizeTest < Test::Unit::TestCase define_method "test_should_allow_#{protocol}_uris" do assert_equal "foo", sanitize_html(%(foo)) + assert_equal "foo", + sanitize_rexml(%(foo)) end end @@ -72,44 +95,57 @@ class SanitizeTest < Test::Unit::TestCase define_method "test_should_allow_uppercase_#{protocol}_uris" do assert_equal "foo", sanitize_html(%(foo)) + assert_equal "foo", + sanitize_rexml(%(foo)) end end def test_should_allow_anchors assert_equal "<script>baz</script>", sanitize_html("") + assert_equal "<script>baz</script>", + sanitize_rexml("") end # RFC 3986, sec 4.2 def test_allow_colons_in_path_component assert_equal "foo", sanitize_html("foo") + assert_equal "foo", + sanitize_rexml("foo") end %w(src width height alt).each do |img_attr| define_method "test_should_allow_image_#{img_attr}_attribute" do assert_equal "", sanitize_html("") + assert_equal "", + sanitize_rexml("") end end def test_should_handle_non_html assert_equal 'abc', sanitize_html("abc") + assert_equal 'abc', sanitize_rexml("abc") end def test_should_handle_blank_text assert_equal '', sanitize_html('') + assert_equal '', sanitize_rexml('') end [%w(img src), %w(a href)].each do |(tag, attr)| close = VOID_ELEMENTS.include?(tag) ? "/>boo" : ">boo" + xclose = VOID_ELEMENTS.include?(tag) ? " />" : ">boo" define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do - assert_equal %(<#{tag} title="1"#{close}), sanitize_html(%(<#{tag} #{attr}="javascript:XSS" title="1">boo)) + assert_equal %(<#{tag} title="1"#{close}), sanitize_html(%(<#{tag} #{attr}="javascript:XSS" title="1">boo)) + assert_equal %(<#{tag} title="1"#{xclose}), sanitize_rexml(%(<#{tag} #{attr}="javascript:XSS" title="1">boo)) end define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols_and_whitespace" do assert_equal %(<#{tag} title="1"#{close}), sanitize_html(%(<#{tag} #{attr}=" javascript:XSS" title="1">boo)) + assert_equal %(<#{tag} title="1"#{xclose}), sanitize_rexml(%(<#{tag} #{attr}=" javascript:XSS" title="1">boo)) end end @@ -157,21 +193,28 @@ class SanitizeTest < Test::Unit::TestCase def test_should_not_fall_for_ridiculous_hack img_hack = %() assert_equal "", sanitize_html(img_hack) + assert_equal "", sanitize_rexml(img_hack) end def test_platypus assert_equal %(never trust your upstream platypus), sanitize_html(%(never trust your upstream platypus)) + assert_equal %(never trust your upstream platypus), + sanitize_rexml(%(never trust your upstream platypus)) end def test_xul assert_equal %(

fubar

), sanitize_html(%(

fubar

)) + assert_equal %(

fubar

), + sanitize_rexml(%(

fubar

)) end def test_input_image assert_equal %(), sanitize_html(%()) + assert_equal %(), + sanitize_rexml(%()) end def test_non_alpha_non_digit @@ -186,27 +229,35 @@ class SanitizeTest < Test::Unit::TestCase def test_img_dynsrc_lowsrc assert_equal "", sanitize_html(%()) - assert_equal "", - sanitize_html(%()) + assert_equal "", + sanitize_rexml(%()) end def test_div_background_image_unicode_encoded assert_equal '
foo
', sanitize_html(%(
foo
)) + assert_equal '
foo
', + sanitize_rexml(%(
foo
)) end def test_div_expression assert_equal '
foo
', sanitize_html(%(
foo
)) + assert_equal '
foo
', + sanitize_rexml(%(
foo
)) end def test_img_vbscript assert_equal '', sanitize_html(%()) + assert_equal '', + sanitize_rexml(%()) end def test_should_handle_astral_plane_characters assert_equal "

\360\235\222\265 \360\235\224\270

", sanitize_html("

𝒵 𝔸

") + assert_equal "

\360\235\222\265 \360\235\224\270

", + sanitize_rexml("

𝒵 𝔸

") end end diff --git a/vendor/plugins/maruku/lib/maruku/output/to_latex.rb b/vendor/plugins/maruku/lib/maruku/output/to_latex.rb index d13dc2dc..d2b9e741 100644 --- a/vendor/plugins/maruku/lib/maruku/output/to_latex.rb +++ b/vendor/plugins/maruku/lib/maruku/output/to_latex.rb @@ -365,7 +365,7 @@ Otherwise, a standard `verbatim` environment is used. color = get_setting(:code_background_color) colorspec = latex_color(color, 'colorbox') - "#{colorspec}{\\tt #{s}}" + "{#{colorspec}{\\tt #{s}}}" end def to_latex_immediate_link