HTML5lib Sanitizer

Replaced native Sanitizer with HTML5lib version. Synced with latest Maruku.
2007-05-25 20:52:27 -05:00 · 2007-05-25 20:52:27 -05:00 · 6b21ac484f
commit 6b21ac484f
parent 457ec8627c
36 changed files with 6534 additions and 215 deletions
--- a/vendor/plugins/HTML5lib/tests/preamble.rb
+++ b/vendor/plugins/HTML5lib/tests/preamble.rb
@ -0,0 +1,11 @@
+require 'test/unit'
+
+HTML5LIB_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
+
+$:.unshift File.join(File.dirname(File.dirname(__FILE__)),'lib')
+
+$:.unshift File.dirname(__FILE__)
+
+def html5lib_test_files(subdirectory)
+    Dir[File.join(HTML5LIB_BASE, 'tests', subdirectory, '*.*')]
+end
--- a/vendor/plugins/HTML5lib/tests/test_encoding.rb
+++ b/vendor/plugins/HTML5lib/tests/test_encoding.rb
@ -0,0 +1,36 @@
+require File.join(File.dirname(__FILE__), 'preamble')
+
+require 'html5lib/inputstream'
+
+class Html5EncodingTestCase < Test::Unit::TestCase
+
+begin
+    require 'rubygems'
+    require 'UniversalDetector'
+
+    def test_chardet
+        File.open(File.join(HTML5LIB_BASE, 'tests', 'encoding', 'chardet', 'test_big5.txt')) do |file|
+            stream = HTML5lib::HTMLInputStream.new(file, :chardet => true)
+            assert_equal 'big5', stream.charEncoding.downcase
+        end
+    end
+rescue LoadError
+    puts "chardet not found, skipping chardet tests"
+end
+
+    html5lib_test_files('encoding').each do |test_file|        
+        test_name = File.basename(test_file).sub('.dat', '').tr('-', '')
+
+        File.read(test_file).split("#data\n").each_with_index do |data, index|
+            next if data.empty?
+            input, encoding = data.split(/\n#encoding\s+/, 2)
+            encoding = encoding.split[0]
+
+            define_method 'test_%s_%d' % [ test_name, index + 1 ] do
+                stream = HTML5lib::HTMLInputStream.new(input, :chardet => false)
+                assert_equal encoding.downcase, stream.charEncoding.downcase, input
+            end
+        end
+    end
+
+end
--- a/vendor/plugins/HTML5lib/tests/test_lxp.rb
+++ b/vendor/plugins/HTML5lib/tests/test_lxp.rb
@ -0,0 +1,212 @@
+require File.join(File.dirname(__FILE__), 'preamble')
+
+require 'html5lib/liberalxmlparser'
+
+XMLELEM = /<(\w+\s*)((?:[-:\w]+="[^"]*"\s*)+)(\/?)>/
+SORTATTRS = '<#{$1+$2.split.sort.join(' ')+$3}>'
+
+def assert_xml_equal(input, expected=nil, parser=HTML5lib::XMLParser)
+    document = parser.parse(input.chomp).root
+    if not expected
+        expected = input.chomp.gsub(XMLELEM,SORTATTRS)
+        expected = expected.gsub(/&#(\d+);/) {[$1.to_i].pack('U')}
+        output = document.to_s.gsub(/'/,'"').gsub(XMLELEM,SORTATTRS)
+        assert_equal(expected, output)
+    else
+        assert_equal(expected, document.to_s.gsub(/'/,'"'))
+    end
+end
+
+def assert_xhtml_equal(input, expected=nil, parser=HTML5lib::XHTMLParser)
+      assert_xml_equal(input, expected, parser)
+end
+
+class BasicXhtml5Test < Test::Unit::TestCase
+
+  def test_title_body_mismatched_close
+    assert_xhtml_equal(
+      '<title>Xhtml</title><b><i>content</b></i>',
+      '<html xmlns="http://www.w3.org/1999/xhtml">' +
+        '<head><title>Xhtml</title></head>' + 
+        '<body><b><i>content</i></b></body>' +
+      '</html>')
+  end
+
+  def test_title_body_named_charref
+    assert_xhtml_equal(
+      '<title>mdash</title>A &mdash B',
+      '<html xmlns="http://www.w3.org/1999/xhtml">' +
+        '<head><title>mdash</title></head>' + 
+        '<body>A '+ [0x2014].pack('U') + ' B</body>' +
+      '</html>')
+  end
+end
+
+class BasicXmlTest < Test::Unit::TestCase
+
+  def test_comment
+    assert_xml_equal("<x><!-- foo --></x>")
+  end
+
+  def test_cdata
+    assert_xml_equal("<x><![CDATA[foo]]></x>","<x>foo</x>")
+  end
+
+  def test_simple_text
+    assert_xml_equal("<p>foo</p>","<p>foo</p>")
+  end
+
+  def test_optional_close
+    assert_xml_equal("<p>foo","<p>foo</p>")
+  end
+
+  def test_html_mismatched
+    assert_xml_equal("<b><i>foo</b></i>","<b><i>foo</i></b>")
+  end
+end
+
+class OpmlTest < Test::Unit::TestCase
+
+  def test_mixedCaseElement
+    assert_xml_equal(
+      '<opml version="1.0">' +
+        '<head><ownerName>Dave Winer</ownerName></head>' +
+      '</opml>')
+  end
+
+  def test_mixedCaseAttribute
+    assert_xml_equal(
+      '<opml version="1.0">' +
+        '<body><outline isComment="true"/></body>' +
+      '</opml>')
+  end
+
+  def test_malformed
+    assert_xml_equal(
+      '<opml version="1.0">' +
+        '<body><outline text="Odds & Ends"/></body>' +
+      '</opml>',
+      '<opml version="1.0">' +
+        '<body><outline text="Odds &amp; Ends"/></body>' +
+      '</opml>')
+  end
+end
+
+class XhtmlTest < Test::Unit::TestCase
+
+  def test_mathml
+    assert_xhtml_equal <<EOX
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>MathML</title></head>
+<body>
+  <math xmlns="http://www.w3.org/1998/Math/MathML">
+    <mrow>
+      <mi>x</mi>
+      <mo>=</mo>
+
+      <mfrac>
+        <mrow>
+          <mrow>
+            <mo>-</mo>
+            <mi>b</mi>
+          </mrow>
+          <mo>&#177;</mo>
+          <msqrt>
+
+            <mrow>
+              <msup>
+                <mi>b</mi>
+                <mn>2</mn>
+              </msup>
+              <mo>-</mo>
+              <mrow>
+
+                <mn>4</mn>
+                <mo>&#8290;</mo>
+                <mi>a</mi>
+                <mo>&#8290;</mo>
+                <mi>c</mi>
+              </mrow>
+            </mrow>
+
+          </msqrt>
+        </mrow>
+        <mrow>
+          <mn>2</mn>
+          <mo>&#8290;</mo>
+          <mi>a</mi>
+        </mrow>
+      </mfrac>
+
+    </mrow>
+  </math>
+</body></html>
+EOX
+  end
+
+  def test_svg
+    assert_xhtml_equal <<EOX
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>SVG</title></head>
+<body>
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
+    <path d="M38,38c0-12,24-15,23-2c0,9-16,13-16,23v7h11v-4c0-9,17-12,17-27
+             c-2-22-45-22-45,3zM45,70h11v11h-11z" fill="#371">
+    </path>
+    <circle cx="50" cy="50" r="45" fill="none" stroke="#371" stroke-width="10">
+    </circle>
+
+  </svg>
+</body></html>
+EOX
+  end
+
+  def test_xlink
+    assert_xhtml_equal <<EOX
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>XLINK</title></head>
+<body>
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
+    <defs xmlns:l="http://www.w3.org/1999/xlink">
+      <radialGradient id="s1" fx=".4" fy=".2" r=".7">
+        <stop stop-color="#FE8"/>
+        <stop stop-color="#D70" offset="1"/>
+      </radialGradient>
+      <radialGradient id="s2" fx=".8" fy=".5" l:href="#s1"/>
+      <radialGradient id="s3" fx=".5" fy=".9" l:href="#s1"/>
+      <radialGradient id="s4" fx=".1" fy=".5" l:href="#s1"/>
+    </defs>
+    <g stroke="#940">
+      <path d="M73,29c-37-40-62-24-52,4l6-7c-8-16,7-26,42,9z" fill="url(#s1)"/>
+      <path d="M47,8c33-16,48,21,9,47l-6-5c38-27,20-44,5-37z" fill="url(#s2)"/>
+      <path d="M77,32c22,30,10,57-39,51l-1-8c3,3,67,5,36-36z" fill="url(#s3)"/>
+
+      <path d="M58,84c-4,20-38-4-8-24l-6-5c-36,43,15,56,23,27z" fill="url(#s4)"/>
+      <path d="M40,14c-40,37-37,52-9,68l1-8c-16-13-29-21,16-56z" fill="url(#s1)"/>
+      <path d="M31,33c19,23,20,7,35,41l-9,1.7c-4-19-8-14-31-37z" fill="url(#s2)"/>
+    </g>
+  </svg>
+</body></html>
+EOX
+  end
+
+  def test_br
+    assert_xhtml_equal <<EOX
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>XLINK</title></head>
+<body>
+<br/>
+</body></html>
+EOX
+  end
+
+  def xtest_strong
+    assert_xhtml_equal <<EOX
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>XLINK</title></head>
+<body>
+<strong></strong>
+</body></html>
+EOX
+  end
+end
--- a/vendor/plugins/HTML5lib/tests/test_parser.rb
+++ b/vendor/plugins/HTML5lib/tests/test_parser.rb
@ -0,0 +1,108 @@
+require File.join(File.dirname(__FILE__), 'preamble')
+
+require 'html5lib/treebuilders'
+require 'html5lib/html5parser'
+
+
+$tree_types_to_test = ['simpletree', 'rexml']
+
+begin
+    require 'hpricot'
+    $tree_types_to_test.push('hpricot')
+rescue LoadError
+end
+
+$CHECK_PARSER_ERRORS = false
+
+puts 'Testing: ' + $tree_types_to_test * ', '
+
+
+class Html5ParserTestCase < Test::Unit::TestCase
+
+    def self.startswith?(a, b)
+        b[0... a.length] == a
+    end
+
+    def self.parseTestcase(data)
+        innerHTML = nil
+        input = []
+        output = []
+        errors = []
+        currentList = input
+        data.split(/\n/).each do |line|
+            if !line.empty? and !startswith?("#errors", line) and
+              !startswith?("#document", line) and
+              !startswith?("#data", line) and
+              !startswith?("#document-fragment", line)
+
+                if currentList == output and startswith?("|", line)
+                    currentList.push(line[2..-1])
+                else
+                    currentList.push(line)
+                end
+            elsif line == "#errors"
+                currentList = errors
+            elsif line == "#document" or startswith?("#document-fragment", line)
+                if startswith?("#document-fragment", line)
+                    innerHTML = line[19..-1]
+                    raise AssertionError unless innerHTML
+                end
+                currentList = output
+            end
+        end
+        return innerHTML, input.join("\n"), output.join("\n"), errors
+    end
+    
+    # convert the output of str(document) to the format used in the testcases
+    def convertTreeDump(treedump)
+        treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
+    end
+
+    def sortattrs(output)
+        output.gsub(/^(\s+)\w+=.*(\n\1\w+=.*)+/) { |match| match.split("\n").sort.join("\n") }
+    end
+
+    html5lib_test_files('tree-construction').each do |test_file|
+
+        test_name = File.basename(test_file).sub('.dat', '')
+
+        File.read(test_file).split("#data\n").each_with_index do |data, index|
+            next if data.empty?
+       
+            innerHTML, input, expected_output, expected_errors = parseTestcase(data)
+
+            $tree_types_to_test.each do |tree_name|
+                define_method 'test_%s_%d_%s' % [ test_name, index + 1, tree_name ] do
+
+                    parser = HTML5lib::HTMLParser.new(:tree => HTML5lib::TreeBuilders.getTreeBuilder(tree_name))
+                
+                    if innerHTML
+                        parser.parseFragment(input, innerHTML)
+                    else
+                        parser.parse(input)
+                    end
+                
+                    actual_output = convertTreeDump(parser.tree.testSerializer(parser.tree.document))
+
+                    assert_equal sortattrs(expected_output), sortattrs(actual_output), [
+                        'Input:', input,
+                        'Expected:', expected_output,
+                        'Recieved:', actual_output
+                    ].join("\n")
+
+                    if $CHECK_PARSER_ERRORS
+                        actual_errors = parser.errors.map do |(line, col), message|
+                            'Line: %i Col: %i %s' % [line, col, message]
+                        end
+                        assert_equal parser.errors.length, expected_errors.length, [
+                            'Expected errors:', expected_errors.join("\n"),
+                            'Actual errors:', actual_errors.join("\n") 
+                        ].join("\n")
+                    end
+                    
+                end
+            end
+        end
+    end
+
+end
--- a/vendor/plugins/HTML5lib/tests/test_sanitizer.rb
+++ b/vendor/plugins/HTML5lib/tests/test_sanitizer.rb
@ -0,0 +1,206 @@
+#!/usr/bin/env ruby
+
+require File.join(File.dirname(__FILE__), 'preamble')
+
+require 'html5lib/sanitizer'
+require 'html5lib/html5parser'
+require 'html5lib/liberalxmlparser'
+
+class SanitizeTest < Test::Unit::TestCase
+  include HTML5lib
+
+  def sanitize_xhtml stream
+    XHTMLParser.parseFragment(stream, :tokenizer => HTMLSanitizer).join('').gsub(/'/,'"')
+  end
+
+  def sanitize_html stream
+    HTMLParser.parseFragment(stream, :tokenizer => HTMLSanitizer).join('').gsub(/'/,'"')
+  end
+
+  HTMLSanitizer::ALLOWED_ELEMENTS.each do |tag_name|
+    next if %w[caption col colgroup optgroup option table tbody td tfoot th thead tr].include?(tag_name) ### TODO
+    define_method "test_should_allow_#{tag_name}_tag" do
+      if tag_name == 'image'
+        assert_equal "<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
+          sanitize_html("<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>")
+      elsif VOID_ELEMENTS.include?(tag_name)
+        assert_equal "<#{tag_name} title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
+          sanitize_html("<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>")
+      else
+        assert_equal "<#{tag_name.downcase} title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>",
+          sanitize_html("<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>")
+        assert_equal "<#{tag_name} title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name}>",
+          sanitize_xhtml("<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>")
+      end
+    end
+  end
+
+  HTMLSanitizer::ALLOWED_ELEMENTS.each do |tag_name|
+    define_method "test_should_forbid_#{tag_name.upcase}_tag" do
+      assert_equal "&lt;#{tag_name.upcase} title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/#{tag_name.upcase}&gt;",
+        sanitize_html("<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>")
+    end
+  end
+
+  HTMLSanitizer::ALLOWED_ATTRIBUTES.each do |attribute_name|
+    next if attribute_name == 'style'
+    define_method "test_should_allow_#{attribute_name}_attribute" do
+      assert_equal "<p #{attribute_name.downcase}=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
+        sanitize_html("<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>")
+      assert_equal "<p #{attribute_name}=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
+        sanitize_xhtml("<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>")
+    end
+  end
+
+  HTMLSanitizer::ALLOWED_ATTRIBUTES.each do |attribute_name|
+    define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
+      assert_equal "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
+        sanitize_html("<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>")
+    end
+  end
+
+  HTMLSanitizer::ALLOWED_PROTOCOLS.each do |protocol|
+    define_method "test_should_allow_#{protocol}_uris" do
+      assert_equal "<a href=\"#{protocol}\">foo</a>",
+        sanitize_html(%(<a href="#{protocol}">foo</a>))
+    end
+  end
+
+  HTMLSanitizer::ALLOWED_PROTOCOLS.each do |protocol|
+    define_method "test_should_allow_uppercase_#{protocol}_uris" do
+      assert_equal "<a href=\"#{protocol.upcase}\">foo</a>",
+        sanitize_html(%(<a href="#{protocol.upcase}">foo</a>))
+    end
+  end
+
+  def test_should_allow_anchors
+    assert_equal "<a href=\"foo\">&lt;script&gt;baz&lt;/script&gt;</a>",
+     sanitize_html("<a href='foo' onclick='bar'><script>baz</script></a>")
+  end
+
+  # RFC 3986, sec 4.2
+  def test_allow_colons_in_path_component
+    assert_equal "<a href=\"./this:that\">foo</a>",
+      sanitize_html("<a href=\"./this:that\">foo</a>")
+  end
+
+  %w(src width height alt).each do |img_attr|
+    define_method "test_should_allow_image_#{img_attr}_attribute" do
+      assert_equal "<img #{img_attr}=\"foo\"/>",
+        sanitize_html("<img #{img_attr}='foo' onclick='bar' />")
+    end
+  end
+
+  def test_should_handle_non_html
+    assert_equal 'abc',  sanitize_html("abc")
+  end
+
+  def test_should_handle_blank_text
+    assert_equal '', sanitize_html('')
+  end
+
+  [%w(img src), %w(a href)].each do |(tag, attr)|
+    close = VOID_ELEMENTS.include?(tag) ? "/>boo" : ">boo</#{tag}>"
+
+    define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do
+      assert_equal %(<#{tag} title="1"#{close}), sanitize_html(%(<#{tag} #{attr}="javascript:XSS" title="1">boo</#{tag}>))
+    end
+
+    define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols_and_whitespace" do
+      assert_equal %(<#{tag} title="1"#{close}), sanitize_html(%(<#{tag} #{attr}=" javascript:XSS" title="1">boo</#{tag}>))
+    end
+  end
+
+  [%(<img src="javascript:alert('XSS');" />),
+   %(<img src=javascript:alert('XSS') />),
+   %(<img src="JaVaScRiPt:alert('XSS')" />),
+   %(<img src='javascript:alert(&quot;XSS&quot;)' />),
+   %(<img src='javascript:alert(String.fromCharCode(88,83,83))' />),
+   %(<img src='&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;' />),
+   %(<img src='&#0000106;&#0000097;&#0000118;&#0000097;&#0000115;&#0000099;&#0000114;&#0000105;&#0000112;&#0000116;&#0000058;&#0000097;&#0000108;&#0000101;&#0000114;&#0000116;&#0000040;&#0000039;&#0000088;&#0000083;&#0000083;&#0000039;&#0000041' />),
+   %(<img src='&#x6A;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3A;&#x61;&#x6C;&#x65;&#x72;&#x74;&#x28;&#x27;&#x58;&#x53;&#x53;&#x27;&#x29' />),
+   %(<img src="jav\tascript:alert('XSS');" />),
+   %(<img src="jav&#x09;ascript:alert('XSS');" />),
+   %(<img src="jav&#x0A;ascript:alert('XSS');" />),
+   %(<img src="jav&#x0D;ascript:alert('XSS');" />),
+   %(<img src=" &#14;  javascript:alert('XSS');" />),
+   %(<img src="&#x20;javascript:alert('XSS');" />),
+   %(<img src="&#xA0;javascript:alert('XSS');" />)].each_with_index do |img_hack, i|
+    define_method "test_should_not_fall_for_xss_image_hack_#{i}" do
+      assert_equal "<img/>", sanitize_html(img_hack)
+    end
+  end
+
+  def test_should_sanitize_tag_broken_up_by_null
+    assert_equal "&lt;scr\357\277\275ipt&gt;alert(\"XSS\")&lt;/scr\357\277\275ipt&gt;", sanitize_html(%(<scr\0ipt>alert(\"XSS\")</scr\0ipt>))
+  end
+
+  def test_should_sanitize_invalid_script_tag
+    assert_equal "&lt;script XSS=\"\" SRC=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;", sanitize_html(%(<script/XSS SRC="http://ha.ckers.org/xss.js"></script>))
+  end
+
+  def test_should_sanitize_script_tag_with_multiple_open_brackets
+    assert_equal "&lt;&lt;script&gt;alert(\"XSS\");//&lt;&lt;/script&gt;", sanitize_html(%(<<script>alert("XSS");//<</script>))
+    assert_equal %(&lt;iframe src=\"http://ha.ckers.org/scriptlet.html\"&gt;&lt;), sanitize_html(%(<iframe src=http://ha.ckers.org/scriptlet.html\n<))
+  end
+
+  def test_should_sanitize_unclosed_script
+    assert_equal "&lt;script src=\"http://ha.ckers.org/xss.js?\"&gt;<b/>", sanitize_html(%(<script src=http://ha.ckers.org/xss.js?<b>))
+  end
+
+  def test_should_sanitize_half_open_scripts
+    assert_equal  "<img/>", sanitize_html(%(<img src="javascript:alert('XSS')"))
+  end
+
+  def test_should_not_fall_for_ridiculous_hack
+    img_hack = %(<img\nsrc\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n />)
+    assert_equal "<img/>", sanitize_html(img_hack)
+  end
+
+  def test_platypus
+    assert_equal %(<a href=\"http://www.ragingplatypus.com/\" style=\"display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;\">never trust your upstream platypus</a>),
+       sanitize_html(%(<a href="http://www.ragingplatypus.com/" style="display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;">never trust your upstream platypus</a>))
+  end
+
+  def test_xul
+    assert_equal %(<p style="">fubar</p>),
+     sanitize_html(%(<p style="-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')">fubar</p>))
+  end
+
+  def test_input_image
+    assert_equal %(<input type="image"/>),
+      sanitize_html(%(<input type="image" src="javascript:alert('XSS');" />))
+  end
+
+  def test_non_alpha_non_digit
+    assert_equal "&lt;script XSS=\"\" src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;",
+      sanitize_html(%(<script/XSS src="http://ha.ckers.org/xss.js"></script>))
+    assert_equal "<a>foo</a>",
+      sanitize_html('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>')
+    assert_equal "<img src=\"http://ha.ckers.org/xss.js\"/>",
+      sanitize_html('<img/src="http://ha.ckers.org/xss.js"/>')
+  end
+
+  def test_img_dynsrc_lowsrc
+     assert_equal "<img/>",
+       sanitize_html(%(<img dynsrc="javascript:alert('XSS')" />))
+     assert_equal "<img/>",
+       sanitize_html(%(<img lowsrc="javascript:alert('XSS')" />))
+  end
+
+  def test_div_background_image_unicode_encoded
+    assert_equal '<div style="">foo</div>',
+      sanitize_html(%(<div style="background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029">foo</div>))
+  end
+
+  def test_div_expression
+    assert_equal '<div style="">foo</div>',
+      sanitize_html(%(<div style="width: expression(alert('XSS'));">foo</div>))
+  end
+
+  def test_img_vbscript
+     assert_equal '<img/>',
+       sanitize_html(%(<img src='vbscript:msgbox("XSS")' />))
+  end
+
+end
--- a/vendor/plugins/HTML5lib/tests/test_tokenizer.rb
+++ b/vendor/plugins/HTML5lib/tests/test_tokenizer.rb
@ -0,0 +1,78 @@
+require File.join(File.dirname(__FILE__), 'preamble')
+
+require 'html5lib/tokenizer'
+
+require 'tokenizer_test_parser'
+
+begin
+  require 'jsonx'
+rescue LoadError
+  class JSON
+    def self.parse json
+      json.gsub! /"\s*:/, '"=>'
+      json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
+      eval json
+    end
+  end
+end 
+
+class Html5TokenizerTestCase < Test::Unit::TestCase
+
+    def type_of?(token_name, token)
+        token != 'ParseError' and token_name == token.first
+    end
+
+    def convert_attribute_arrays_to_hashes(tokens)
+        tokens.inject([]) do |tokens, token|
+            token[2] = Hash[*token[2].reverse.flatten] if type_of?('StartTag', token)
+            tokens << token
+        end
+    end
+    
+    def concatenate_consecutive_characters(tokens)
+        tokens.inject([]) do |tokens, token|
+            if type_of?('Character', token) and tokens.any? and type_of?('Character', tokens.last)
+                tokens.last[1] = tokens.last[1] + token[1]
+                next tokens
+            end
+            tokens << token
+        end
+    end
+
+    def tokenizer_test(data)
+        (data['contentModelFlags'] || [:PCDATA]).each do |content_model_flag|
+            message = [
+                'Description:', data['description'],
+                'Input:', data['input'],
+                'Content Model Flag:', content_model_flag ] * "\n"
+
+            assert_nothing_raised message do
+                tokenizer = HTML5lib::HTMLTokenizer.new(data['input'])
+
+                tokenizer.contentModelFlag = content_model_flag.to_sym
+                
+                tokenizer.currentToken = {:type => :startTag, :name => data['lastStartTag']} if data.has_key?('lastStartTag')
+
+                tokens = TokenizerTestParser.new(tokenizer).parse
+
+                actual = concatenate_consecutive_characters(convert_attribute_arrays_to_hashes(tokens))
+
+                expected = concatenate_consecutive_characters(data['output'])
+
+                assert_equal expected, actual, message
+            end
+        end 
+    end
+
+    html5lib_test_files('tokenizer').each do |test_file|
+        test_name = File.basename(test_file).sub('.test', '')
+
+        tests = JSON.parse(File.read(test_file))['tests']
+
+        tests.each_with_index do |data, index|
+            define_method('test_%s_%d' % [test_name, index + 1]) { tokenizer_test data }
+        end
+    end
+
+end
+
--- a/vendor/plugins/HTML5lib/tests/tokenizer_test_parser.rb
+++ b/vendor/plugins/HTML5lib/tests/tokenizer_test_parser.rb
@ -0,0 +1,62 @@
+require 'html5lib/constants'
+
+class TokenizerTestParser
+    def initialize(tokenizer)
+        @tokenizer = tokenizer
+    end
+
+    def parse
+        @outputTokens = []
+
+        debug = nil
+        for token in @tokenizer
+            debug = token.inspect if token[:type] == :ParseError
+            send ('process' + token[:type].to_s), token
+        end
+
+        return @outputTokens
+    end
+
+    def processDoctype(token)
+        @outputTokens.push(["DOCTYPE", token[:name], token[:data]])
+    end
+
+    def processStartTag(token)
+        @outputTokens.push(["StartTag", token[:name], token[:data]])
+    end
+
+    def processEmptyTag(token)
+        if not HTML5lib::VOID_ELEMENTS.include? token[:name]
+            @outputTokens.push("ParseError")
+        end
+        @outputTokens.push(["StartTag", token[:name], token[:data]])
+    end
+
+    def processEndTag(token)
+        if token[:data].length > 0
+            self.processParseError(token)
+        end
+        @outputTokens.push(["EndTag", token[:name]])
+    end
+
+    def processComment(token)
+        @outputTokens.push(["Comment", token[:data]])
+    end
+
+    def processCharacters(token)
+        @outputTokens.push(["Character", token[:data]])
+    end
+
+    alias processSpaceCharacters processCharacters
+
+    def processCharacters(token)
+        @outputTokens.push(["Character", token[:data]])
+    end
+
+    def processEOF(token)
+    end
+
+    def processParseError(token)
+        @outputTokens.push("ParseError")
+    end
+end