Updated to Latest HTML5lib
Synced with latest HTML5lib. Added some RDoc-compatible documentation to the sanitizer.
This commit is contained in:
parent
8badd0766a
commit
3bf560c3b3
7 changed files with 127 additions and 93 deletions
|
@ -12,11 +12,11 @@ class SanitizeTest < Test::Unit::TestCase
|
|||
include HTML5lib
|
||||
|
||||
def sanitize_xhtml stream
|
||||
XHTMLParser.parseFragment(stream, :tokenizer => HTMLSanitizer).join('').gsub(/'/,'"')
|
||||
XHTMLParser.parseFragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8'}).join('').gsub(/'/,'"')
|
||||
end
|
||||
|
||||
def sanitize_html stream
|
||||
HTMLParser.parseFragment(stream, :tokenizer => HTMLSanitizer).join('').gsub(/'/,'"')
|
||||
HTMLParser.parseFragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8'}).join('').gsub(/'/,'"')
|
||||
end
|
||||
|
||||
def sanitize_rexml stream
|
||||
|
@ -259,5 +259,9 @@ class SanitizeTest < Test::Unit::TestCase
|
|||
sanitize_html("<p>𝒵 𝔸</p>")
|
||||
assert_equal "<p>\360\235\222\265 \360\235\224\270</p>",
|
||||
sanitize_rexml("<p>𝒵 𝔸</p>")
|
||||
assert_equal "<p><tspan>\360\235\224\270</tspan> a</p>",
|
||||
sanitize_html("<p><tspan>\360\235\224\270</tspan> a</p>")
|
||||
assert_equal "<p><tspan>\360\235\224\270</tspan> a</p>",
|
||||
sanitize_rexml("<p><tspan>\360\235\224\270</tspan> a</p>")
|
||||
end
|
||||
end
|
||||
|
|
26
vendor/plugins/HTML5lib/tests/test_stream.rb
vendored
26
vendor/plugins/HTML5lib/tests/test_stream.rb
vendored
|
@ -6,7 +6,7 @@ class HTMLInputStreamTest < Test::Unit::TestCase
|
|||
include HTML5lib
|
||||
|
||||
def test_char_ascii
|
||||
stream = HTMLInputStream.new("'")
|
||||
stream = HTMLInputStream.new("'", :encoding=>'ascii')
|
||||
assert_equal('ascii', stream.char_encoding)
|
||||
assert_equal("'", stream.char)
|
||||
end
|
||||
|
@ -17,11 +17,21 @@ class HTMLInputStreamTest < Test::Unit::TestCase
|
|||
end
|
||||
|
||||
def test_char_utf8
|
||||
stream = HTMLInputStream.new("\xe2\x80\x98")
|
||||
stream = HTMLInputStream.new("\xe2\x80\x98", :encoding=>'utf-8')
|
||||
assert_equal('utf-8', stream.char_encoding)
|
||||
assert_equal("\xe2\x80\x98", stream.char)
|
||||
end
|
||||
|
||||
def test_char_win1252
|
||||
stream = HTMLInputStream.new("\xa2\xc5\xf1\x92\x86")
|
||||
assert_equal('windows-1252', stream.char_encoding)
|
||||
assert_equal("\xc2\xa2", stream.char)
|
||||
assert_equal("\xc3\x85", stream.char)
|
||||
assert_equal("\xc3\xb1", stream.char)
|
||||
assert_equal("\xe2\x80\x99", stream.char)
|
||||
assert_equal("\xe2\x80\xa0", stream.char)
|
||||
end
|
||||
|
||||
def test_bom
|
||||
stream = HTMLInputStream.new("\xef\xbb\xbf" + "'")
|
||||
assert_equal('utf-8', stream.char_encoding)
|
||||
|
@ -31,12 +41,6 @@ class HTMLInputStreamTest < Test::Unit::TestCase
|
|||
begin
|
||||
require 'iconv'
|
||||
|
||||
def test_char_win1252
|
||||
stream = HTMLInputStream.new("\x91")
|
||||
assert_equal('windows-1252', stream.char_encoding)
|
||||
assert_equal("\xe2\x80\x98", stream.char)
|
||||
end
|
||||
|
||||
def test_utf_16
|
||||
stream = HTMLInputStream.new("\xff\xfe" + " \x00"*1025)
|
||||
assert(stream.char_encoding, 'utf-16-le')
|
||||
|
@ -51,10 +55,10 @@ class HTMLInputStreamTest < Test::Unit::TestCase
|
|||
assert_equal(0, stream.instance_eval {@tell})
|
||||
assert_equal("a\nbb\n", stream.chars_until('c'))
|
||||
assert_equal(6, stream.instance_eval {@tell})
|
||||
assert_equal([3,1], stream.position)
|
||||
assert_equal([3,0], stream.position)
|
||||
assert_equal("ccc\ndddd", stream.chars_until('x'))
|
||||
assert_equal(14, stream.instance_eval {@tell})
|
||||
assert_equal([4,5], stream.position)
|
||||
assert_equal([0,1,4,8], stream.instance_eval {@new_lines})
|
||||
assert_equal([4,4], stream.position)
|
||||
assert_equal([0,1,5,9], stream.instance_eval {@new_lines})
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue