Sync with latest HTML5lib
This commit is contained in:
parent
c2bfdefa57
commit
0ddd422059
25 changed files with 39581 additions and 302 deletions
13
vendor/plugins/HTML5lib/tests/preamble.rb
vendored
13
vendor/plugins/HTML5lib/tests/preamble.rb
vendored
|
@ -1,17 +1,24 @@
|
|||
require 'test/unit'
|
||||
|
||||
HTML5LIB_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
|
||||
HTML5LIB_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
|
||||
|
||||
if File.exists?(File.join(HTML5LIB_BASE, 'testdata'))
|
||||
TESTDATA_DIR = File.join(HTML5LIB_BASE, 'testdata')
|
||||
else
|
||||
TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata')
|
||||
end
|
||||
|
||||
$:.unshift File.join(File.dirname(File.dirname(__FILE__)),'lib')
|
||||
|
||||
$:.unshift File.dirname(__FILE__)
|
||||
|
||||
def html5lib_test_files(subdirectory)
|
||||
Dir[File.join(HTML5LIB_BASE, 'tests', subdirectory, '*.*')]
|
||||
Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
|
||||
end
|
||||
|
||||
begin
|
||||
require 'jsonx'
|
||||
require 'rubygems'
|
||||
require 'json'
|
||||
rescue LoadError
|
||||
class JSON
|
||||
def self.parse json
|
||||
|
|
11
vendor/plugins/HTML5lib/tests/test_encoding.rb
vendored
11
vendor/plugins/HTML5lib/tests/test_encoding.rb
vendored
|
@ -9,13 +9,12 @@ class Html5EncodingTestCase < Test::Unit::TestCase
|
|||
require 'UniversalDetector'
|
||||
|
||||
def test_chardet
|
||||
File.open(File.join(HTML5LIB_BASE, 'tests', 'encoding', 'chardet', 'test_big5.txt')) do |file|
|
||||
stream = HTML5lib::HTMLInputStream.new(file, :chardet => true)
|
||||
assert_equal 'big5', stream.char_encoding.downcase
|
||||
end
|
||||
file = File.open(File.join(TESTDATA_DIR, 'encoding', 'chardet', 'test_big5.txt'), 'r')
|
||||
stream = HTML5lib::HTMLInputStream.new(file, :chardet => true)
|
||||
assert_equal 'big5', stream.char_encoding.downcase
|
||||
rescue LoadError
|
||||
puts "chardet not found, skipping chardet tests"
|
||||
end
|
||||
rescue LoadError
|
||||
puts "chardet not found, skipping chardet tests"
|
||||
end
|
||||
|
||||
html5lib_test_files('encoding').each do |test_file|
|
||||
|
|
2
vendor/plugins/HTML5lib/tests/test_parser.rb
vendored
2
vendor/plugins/HTML5lib/tests/test_parser.rb
vendored
|
@ -54,7 +54,7 @@ class Html5ParserTestCase < Test::Unit::TestCase
|
|||
actual_errors = parser.errors.map do |(line, col), message|
|
||||
'Line: %i Col: %i %s' % [line, col, message]
|
||||
end
|
||||
assert_equal parser.errors.length, expected_errors.length, [
|
||||
assert_equal expected_errors.length, parser.errors.length, [
|
||||
'Expected errors:', expected_errors.join("\n"),
|
||||
'Actual errors:', actual_errors.join("\n")
|
||||
].join("\n")
|
||||
|
|
307
vendor/plugins/HTML5lib/tests/test_sanitizer.rb
vendored
307
vendor/plugins/HTML5lib/tests/test_sanitizer.rb
vendored
|
@ -31,14 +31,14 @@ class SanitizeTest < Test::Unit::TestCase
|
|||
:omit_optional_tags => false,
|
||||
:inject_meta_charset => false,
|
||||
:sanitize => true}).gsub(/^<div xmlns='http:\/\/www.w3.org\/1999\/xhtml'>(.*)<\/div>$/, '\1')
|
||||
rescue
|
||||
return "Ill-formed XHTML!"
|
||||
rescue
|
||||
return "Ill-formed XHTML!"
|
||||
end
|
||||
|
||||
def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
|
||||
assert_equal htmloutput, sanitize_html(input)
|
||||
assert_equal xhtmloutput, sanitize_xhtml(input)
|
||||
assert_equal rexmloutput, sanitize_rexml(input)
|
||||
assert_equal htmloutput, sanitize_html(input)
|
||||
assert_equal xhtmloutput, sanitize_xhtml(input)
|
||||
assert_equal rexmloutput, sanitize_rexml(input)
|
||||
end
|
||||
|
||||
HTMLSanitizer::ALLOWED_ELEMENTS.each do |tag_name|
|
||||
|
@ -113,191 +113,6 @@ class SanitizeTest < Test::Unit::TestCase
|
|||
end
|
||||
end
|
||||
|
||||
def test_should_allow_anchors
|
||||
input = "<a href='foo' onclick='bar'><script>baz</script></a>"
|
||||
output = "<a href='foo'><script>baz</script></a>"
|
||||
check_sanitization(input, output, output, output)
|
||||
end
|
||||
|
||||
# RFC 3986, sec 4.2
|
||||
def test_allow_colons_in_path_component
|
||||
input = "<a href=\"./this:that\">foo</a>"
|
||||
output = "<a href='./this:that'>foo</a>"
|
||||
check_sanitization(input, output, output, output)
|
||||
end
|
||||
|
||||
%w(src width height alt).each do |img_attr|
|
||||
define_method "test_should_allow_image_#{img_attr}_attribute" do
|
||||
input = "<img #{img_attr}='foo' onclick='bar' />"
|
||||
output = "<img #{img_attr}='foo'/>"
|
||||
rexmloutput = "<img #{img_attr}='foo' />"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
end
|
||||
|
||||
def test_should_handle_non_html
|
||||
input = 'abc'
|
||||
output = 'abc'
|
||||
check_sanitization(input, output, output, output)
|
||||
end
|
||||
|
||||
def test_should_handle_blank_text
|
||||
input = ''
|
||||
output = ''
|
||||
check_sanitization(input, output, output, output)
|
||||
end
|
||||
|
||||
[%w(img src), %w(a href)].each do |(tag, attr)|
|
||||
close = VOID_ELEMENTS.include?(tag) ? "/>boo" : ">boo</#{tag}>"
|
||||
xclose = VOID_ELEMENTS.include?(tag) ? " />" : ">boo</#{tag}>"
|
||||
|
||||
input = %(<#{tag} #{attr}="javascript:XSS" title="1">boo</#{tag}>)
|
||||
output = %(<#{tag} title='1'#{close})
|
||||
rexmloutput = %(<#{tag} title='1'#{xclose})
|
||||
define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols_and_whitespace" do
|
||||
input = %(<#{tag} #{attr}=" javascript:XSS" title="1">boo</#{tag}>)
|
||||
output = %(<#{tag} title='1'#{close})
|
||||
rexmloutput = %(<#{tag} title='1'#{xclose})
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
end
|
||||
|
||||
[%(<img src="javascript:alert('XSS');" />),
|
||||
%(<img src=javascript:alert('XSS') />),
|
||||
%(<img src="JaVaScRiPt:alert('XSS')" />),
|
||||
%(<img src='javascript:alert("XSS")' />),
|
||||
%(<img src='javascript:alert(String.fromCharCode(88,83,83))' />),
|
||||
%(<img src='javascript:alert('XSS')' />),
|
||||
%(<img src='javascript:alert('XSS')' />),
|
||||
%(<img src='javascript:alert('XSS')' />),
|
||||
%(<img src="jav\tascript:alert('XSS');" />),
|
||||
%(<img src="jav	ascript:alert('XSS');" />),
|
||||
%(<img src="jav
ascript:alert('XSS');" />),
|
||||
%(<img src="jav
ascript:alert('XSS');" />),
|
||||
%(<img src="  javascript:alert('XSS');" />),
|
||||
%(<img src=" javascript:alert('XSS');" />),
|
||||
%(<img src=" javascript:alert('XSS');" />)].each_with_index do |img_hack, i|
|
||||
define_method "test_should_not_fall_for_xss_image_hack_#{i}" do
|
||||
output = "<img/>"
|
||||
rexmloutput = "<img />"
|
||||
rexmloutput = "Ill-formed XHTML!" if i == 1
|
||||
check_sanitization(img_hack, output, output, rexmloutput)
|
||||
end
|
||||
end
|
||||
|
||||
def test_should_sanitize_tag_broken_up_by_null
|
||||
input = %(<scr\0ipt>alert(\"XSS\")</scr\0ipt>)
|
||||
output = "<scr\357\277\275ipt>alert(\"XSS\")</scr\357\277\275ipt>"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_should_sanitize_invalid_script_tag
|
||||
input = %(<script/XSS SRC="http://ha.ckers.org/xss.js"></script>)
|
||||
output = "<script XSS=\"\" SRC=\"http://ha.ckers.org/xss.js\"></script>"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_should_sanitize_script_tag_with_multiple_open_brackets
|
||||
input = %(<<script>alert("XSS");//<</script>)
|
||||
output = "<<script>alert(\"XSS\");//<</script>"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
|
||||
input = %(<iframe src=http://ha.ckers.org/scriptlet.html\n<)
|
||||
output = %(<iframe src=\"http://ha.ckers.org/scriptlet.html\"><)
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_should_sanitize_unclosed_script
|
||||
input = %(<script src=http://ha.ckers.org/xss.js?<b>)
|
||||
output = "<script src=\"http://ha.ckers.org/xss.js?\"><b/>"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_should_sanitize_half_open_scripts
|
||||
input = %(<img src="javascript:alert('XSS')")
|
||||
output = "<img/>"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_should_not_fall_for_ridiculous_hack
|
||||
img_hack = %(<img\nsrc\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n />)
|
||||
output = "<img/>"
|
||||
rexmloutput = "<img />"
|
||||
check_sanitization(img_hack, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_platypus
|
||||
input = %(<a href="http://www.ragingplatypus.com/" style="display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;">never trust your upstream platypus</a>)
|
||||
output = %(<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>)
|
||||
check_sanitization(input, output, output, output)
|
||||
end
|
||||
|
||||
def test_xul
|
||||
input = %(<p style="-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')">fubar</p>)
|
||||
output = %(<p style=''>fubar</p>)
|
||||
check_sanitization(input, output, output, output)
|
||||
end
|
||||
|
||||
def test_input_image
|
||||
input = %(<input type="image" src="javascript:alert('XSS');" />)
|
||||
output = %(<input type='image'/>)
|
||||
rexmloutput = %(<input type='image' />)
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_non_alpha_non_digit
|
||||
input = %(<script/XSS src="http://ha.ckers.org/xss.js"></script>)
|
||||
output = "<script XSS=\"\" src=\"http://ha.ckers.org/xss.js\"></script>"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
|
||||
input = '<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>'
|
||||
output = "<a>foo</a>"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
|
||||
input = '<img/src="http://ha.ckers.org/xss.js"/>'
|
||||
output = "<img src='http://ha.ckers.org/xss.js'/>"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_img_dynsrc_lowsrc
|
||||
input = %(<img dynsrc="javascript:alert('XSS')" />)
|
||||
output = "<img/>"
|
||||
rexmloutput = "<img />"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_div_background_image_unicode_encoded
|
||||
input = %(<div style="background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029">foo</div>)
|
||||
output = "<div style=''>foo</div>"
|
||||
check_sanitization(input, output, output, output)
|
||||
end
|
||||
|
||||
def test_div_expression
|
||||
input = %(<div style="width: expression(alert('XSS'));">foo</div>)
|
||||
output = "<div style=''>foo</div>"
|
||||
check_sanitization(input, output, output, output)
|
||||
end
|
||||
|
||||
def test_img_vbscript
|
||||
input = %(<img src='vbscript:msgbox("XSS")' />)
|
||||
output = '<img/>'
|
||||
rexmloutput = '<img />'
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_should_handle_astral_plane_characters
|
||||
input = "<p>𝒵 𝔸</p>"
|
||||
output = "<p>\360\235\222\265 \360\235\224\270</p>"
|
||||
|
@ -308,67 +123,6 @@ class SanitizeTest < Test::Unit::TestCase
|
|||
check_sanitization(input, output, output, output)
|
||||
end
|
||||
|
||||
def test_should_handle_malformed_image_tags
|
||||
input = %(<img """><script>alert("XSS")</script>">)
|
||||
output = "<img/><script>alert(\"XSS\")</script>\">"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_non_alpha_non_digit_II
|
||||
input = %(<a href!#\$%&()*~+-_.,:;?@[/|\]^`=alert('XSS')>foo</a>)
|
||||
output = "<a>foo</a>"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_non_alpha_non_digit_III
|
||||
input = %(<a/href="javascript:alert('XSS');">foo</a>)
|
||||
output = "<a>foo</a>"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_no_closing_script_tags
|
||||
input = %(<script src=http://ha.ckers.org/xss.js?<b>)
|
||||
output = "<script src=\"http://ha.ckers.org/xss.js?\"><b/>"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_protocol_resolution_in_script_tag
|
||||
input = %(<script src=//ha.ckers.org/.j></script>)
|
||||
output = "<script src=\"//ha.ckers.org/.j\"></script>"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_double_open_angle_brackets
|
||||
input = %(<img src=http://ha.ckers.org/scriptlet.html <)
|
||||
output = "<img src='http://ha.ckers.org/scriptlet.html'/><"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
|
||||
input = %(<script src=http://ha.ckers.org/scriptlet.html <)
|
||||
output = "<script src=\"http://ha.ckers.org/scriptlet.html\"><"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_background_attribute
|
||||
input = %(<div background="javascript:alert('XSS')"></div>)
|
||||
output = "<div/>"
|
||||
xhtmloutput = "<div></div>"
|
||||
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
||||
end
|
||||
|
||||
def test_bgsound
|
||||
input = %(<bgsound src="javascript:alert('XSS');" />)
|
||||
output = "<bgsound src=\"javascript:alert('XSS');\"/>"
|
||||
rexmloutput = "<bgsound src=\"javascript:alert('XSS');\"></bgsound>"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
# This affects only NS4. Is it worth fixing?
|
||||
# def test_javascript_includes
|
||||
# input = %(<div size="&{alert('XSS')}">foo</div>)
|
||||
|
@ -376,45 +130,16 @@ class SanitizeTest < Test::Unit::TestCase
|
|||
# check_sanitization(input, output, output, output)
|
||||
# end
|
||||
|
||||
def test_link_stylesheets
|
||||
input =%(<link rel="stylesheet" href="javascript:alert('XSS');" />)
|
||||
output = "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\"/>"
|
||||
rexmloutput = "<link href=\"javascript:alert('XSS');\" rel=\"stylesheet\"/>"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
|
||||
input =%(<link rel="stylesheet" href="http://ha.ckers.org/xss.css" />)
|
||||
output = "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\"/>"
|
||||
rexmloutput = "<link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\"/>"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_list_style_image
|
||||
input = %(<li style="list-style-image: url\(javascript:alert\('XSS'\)\)">foo</li>)
|
||||
output = "<li style=''>foo</li>"
|
||||
check_sanitization(input, output, output, output)
|
||||
end
|
||||
|
||||
def test_IE_Comments
|
||||
input = %(<!--[if gte IE 4]><script>alert\('XSS'\);</script><![endif]-->)
|
||||
output = ""
|
||||
check_sanitization(input, output, output, output)
|
||||
|
||||
input = %(<![if !IE 5]><script>alert\('XSS'\);</script><![endif]>)
|
||||
output = "<script>alert('XSS');</script>"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
end
|
||||
|
||||
def test_xml_base
|
||||
input = %(<div xml:base="javascript:alert('XSS');//">foo</div>)
|
||||
output = "<div>foo</div>"
|
||||
check_sanitization(input, output, output, output)
|
||||
end
|
||||
|
||||
def test_grave_accents
|
||||
input =%(<img src=`javascript:alert('XSS')` />)
|
||||
output = "<img/>"
|
||||
rexmloutput = "Ill-formed XHTML!"
|
||||
check_sanitization(input, output, output, rexmloutput)
|
||||
html5lib_test_files('sanitizer').each do |filename|
|
||||
JSON::parse(open(filename).read).each do |test|
|
||||
define_method "test_#{test['name']}" do
|
||||
check_sanitization(
|
||||
test['input'],
|
||||
test['output'],
|
||||
test['xhtml'] || test['output'],
|
||||
test['rexml'] || test['output']
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue