Minor S5 tweaks and Sync with Latest HTML5lib
This commit is contained in:
parent
dbed460843
commit
81d3cdc8e4
81 changed files with 9887 additions and 1687 deletions
15
vendor/plugins/HTML5lib/tests/preamble.rb
vendored
15
vendor/plugins/HTML5lib/tests/preamble.rb
vendored
|
@ -16,19 +16,8 @@ def html5_test_files(subdirectory)
|
|||
Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
|
||||
end
|
||||
|
||||
begin
|
||||
require 'rubygems'
|
||||
require 'json'
|
||||
rescue LoadError
|
||||
class JSON
|
||||
def self.parse json
|
||||
json.gsub!(/"\s*:/, '"=>')
|
||||
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
|
||||
null = nil
|
||||
eval json
|
||||
end
|
||||
end
|
||||
end
|
||||
require 'rubygems'
|
||||
require 'json'
|
||||
|
||||
module HTML5
|
||||
module TestSupport
|
||||
|
|
18
vendor/plugins/HTML5lib/tests/test_lxp.rb
vendored
18
vendor/plugins/HTML5lib/tests/test_lxp.rb
vendored
|
@ -6,7 +6,7 @@ XMLELEM = /<(\w+\s*)((?:[-:\w]+="[^"]*"\s*)+)(\/?)>/
|
|||
|
||||
def assert_xml_equal(input, expected=nil, parser=HTML5::XMLParser)
|
||||
sortattrs = proc {"<#{$1+$2.split.sort.join(' ')+$3}>"}
|
||||
document = parser.parse(input.chomp).root
|
||||
document = parser.parse(input.chomp, :lowercase_attr_name => false, :lowercase_element_name => false).root
|
||||
if not expected
|
||||
expected = input.chomp.gsub(XMLELEM,&sortattrs)
|
||||
expected = expected.gsub(/&#(\d+);/) {[$1.to_i].pack('U')}
|
||||
|
@ -257,6 +257,22 @@ EOX1
|
|||
<head><title>PROLOG</title></head>
|
||||
<body>
|
||||
</body></html>
|
||||
EOX2
|
||||
end
|
||||
|
||||
def test_tagsoup
|
||||
assert_xhtml_equal <<EOX1, <<EOX2.strip
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>TAGSOUP</title></head>
|
||||
<body>
|
||||
<u><blockquote><p></u>
|
||||
</body></html>
|
||||
EOX1
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>TAGSOUP</title></head>
|
||||
<body>
|
||||
<u/><blockquote><u/><p><u/>
|
||||
</p></blockquote></body></html>
|
||||
EOX2
|
||||
end
|
||||
|
||||
|
|
24
vendor/plugins/HTML5lib/tests/test_parser.rb
vendored
24
vendor/plugins/HTML5lib/tests/test_parser.rb
vendored
|
@ -26,8 +26,9 @@ class Html5ParserTestCase < Test::Unit::TestCase
|
|||
test_name = File.basename(test_file).sub('.dat', '')
|
||||
|
||||
TestData.new(test_file, %w(data errors document-fragment document)).
|
||||
each_with_index do |(input, errors, innerHTML, expected), index|
|
||||
each_with_index do |(input, errors, inner_html, expected), index|
|
||||
|
||||
errors = errors.split("\n")
|
||||
expected = expected.gsub("\n| ","\n")[2..-1]
|
||||
|
||||
$tree_types_to_test.each do |tree_name|
|
||||
|
@ -35,8 +36,8 @@ class Html5ParserTestCase < Test::Unit::TestCase
|
|||
|
||||
parser = HTMLParser.new(:tree => TreeBuilders[tree_name])
|
||||
|
||||
if innerHTML
|
||||
parser.parseFragment(input, innerHTML)
|
||||
if inner_html
|
||||
parser.parse_fragment(input, inner_html)
|
||||
else
|
||||
parser.parse(input)
|
||||
end
|
||||
|
@ -49,16 +50,15 @@ class Html5ParserTestCase < Test::Unit::TestCase
|
|||
'', 'Recieved:', actual_output
|
||||
].join("\n")
|
||||
|
||||
if $CHECK_PARSER_ERRORS
|
||||
actual_errors = parser.errors.map do |(line, col), message|
|
||||
'Line: %i Col: %i %s' % [line, col, message]
|
||||
end
|
||||
assert_equal errors.length, parser.errors.length, [
|
||||
'Input', input + "\n",
|
||||
'Expected errors:', errors.join("\n"),
|
||||
'Actual errors:', actual_errors.join("\n")
|
||||
].join("\n")
|
||||
actual_errors = parser.errors.map do |(line, col), message|
|
||||
'Line: %i Col: %i %s' % [line, col, message]
|
||||
end
|
||||
assert_equal errors.length, parser.errors.length, [
|
||||
'', 'Input', input,
|
||||
'', "Expected errors (#{errors.length}):", errors.join("\n"),
|
||||
'', "Actual errors (#{actual_errors.length}):",
|
||||
actual_errors.join("\n")
|
||||
].join("\n")
|
||||
|
||||
end
|
||||
end
|
||||
|
|
10
vendor/plugins/HTML5lib/tests/test_sanitizer.rb
vendored
10
vendor/plugins/HTML5lib/tests/test_sanitizer.rb
vendored
|
@ -12,17 +12,17 @@ class SanitizeTest < Test::Unit::TestCase
|
|||
include HTML5
|
||||
|
||||
def sanitize_xhtml stream
|
||||
XHTMLParser.parseFragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8'}).to_s
|
||||
XHTMLParser.parse_fragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8', :lowercase_element_name => false, :lowercase_attr_name => false}).to_s
|
||||
end
|
||||
|
||||
def sanitize_html stream
|
||||
HTMLParser.parseFragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8'}).to_s
|
||||
HTMLParser.parse_fragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8', :lowercase_element_name => false, :lowercase_attr_name => false}).to_s
|
||||
end
|
||||
|
||||
def sanitize_rexml stream
|
||||
require 'rexml/document'
|
||||
doc = REXML::Document.new("<div xmlns='http://www.w3.org/1999/xhtml'>#{stream}</div>")
|
||||
tokens = TreeWalkers.getTreeWalker('rexml').new(doc)
|
||||
tokens = TreeWalkers.get_tree_walker('rexml').new(doc)
|
||||
XHTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
|
||||
:quote_char => "'",
|
||||
:inject_meta_charset => false,
|
||||
|
@ -39,8 +39,8 @@ class SanitizeTest < Test::Unit::TestCase
|
|||
|
||||
HTMLSanitizer::ALLOWED_ELEMENTS.each do |tag_name|
|
||||
define_method "test_should_allow_#{tag_name}_tag" do
|
||||
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
||||
htmloutput = "<#{tag_name.downcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.downcase}>"
|
||||
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
||||
htmloutput = "<#{tag_name.downcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.downcase}>"
|
||||
xhtmloutput = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
||||
rexmloutput = xhtmloutput
|
||||
|
||||
|
|
|
@ -12,17 +12,17 @@ class JsonWalker < HTML5::TreeWalkers::Base
|
|||
@tree.each do |token|
|
||||
case token[0]
|
||||
when 'StartTag'
|
||||
yield startTag(token[1], token[2])
|
||||
yield start_tag(token[1], token[2])
|
||||
when 'EndTag'
|
||||
yield endTag(token[1])
|
||||
yield end_tag(token[1])
|
||||
when 'EmptyTag'
|
||||
yield emptyTag(token[1], token[2])
|
||||
yield empty_tag(token[1], token[2])
|
||||
when 'Comment'
|
||||
yield comment(token[1])
|
||||
when 'Characters', 'SpaceCharacters'
|
||||
text(token[1]) {|textToken| yield textToken}
|
||||
when 'Doctype'
|
||||
yield doctype(token[1])
|
||||
yield doctype(token[1], token[2], token[3])
|
||||
else
|
||||
raise "Unknown token type: " + token[0]
|
||||
end
|
||||
|
|
27
vendor/plugins/HTML5lib/tests/test_sniffer.rb
vendored
Normal file
27
vendor/plugins/HTML5lib/tests/test_sniffer.rb
vendored
Normal file
|
@ -0,0 +1,27 @@
|
|||
require File.join(File.dirname(__FILE__), 'preamble')
|
||||
require "html5/sniffer"
|
||||
|
||||
class TestFeedTypeSniffer < Test::Unit::TestCase
|
||||
include HTML5
|
||||
include TestSupport
|
||||
include Sniffer
|
||||
|
||||
html5_test_files('sniffer').each do |test_file|
|
||||
test_name = File.basename(test_file).sub('.test', '')
|
||||
|
||||
tests = JSON.parse(File.read(test_file))
|
||||
|
||||
tests.each_with_index do |data, index|
|
||||
define_method('test_%s_%d' % [test_name, index + 1]) do
|
||||
assert_equal data['type'], html_or_feed(data['input'])
|
||||
end
|
||||
end
|
||||
end
|
||||
# each_with_index do |t, i|
|
||||
# define_method "test_#{i}" do
|
||||
# assert_equal t[0], sniff_feed_type(t[1])
|
||||
# end
|
||||
# end
|
||||
|
||||
|
||||
end
|
33
vendor/plugins/HTML5lib/tests/test_tokenizer.rb
vendored
33
vendor/plugins/HTML5lib/tests/test_tokenizer.rb
vendored
|
@ -6,6 +6,33 @@ require 'tokenizer_test_parser'
|
|||
|
||||
class Html5TokenizerTestCase < Test::Unit::TestCase
|
||||
|
||||
def assert_tokens_match(expectedTokens, receivedTokens, ignoreErrorOrder, message)
|
||||
if !ignoreErrorOrder
|
||||
return expectedTokens == receivedTokens
|
||||
else
|
||||
#Sort the tokens into two groups; non-parse errors and parse errors
|
||||
expected = [[],[]]
|
||||
received = [[],[]]
|
||||
|
||||
for token in expectedTokens
|
||||
if token != "ParseError"
|
||||
expected[0] << token
|
||||
else
|
||||
expected[1] << token
|
||||
end
|
||||
end
|
||||
|
||||
for token in receivedTokens
|
||||
if token != "ParseError"
|
||||
received[0] << token
|
||||
else
|
||||
received[1] << token
|
||||
end
|
||||
end
|
||||
assert_equal expected, received, message
|
||||
end
|
||||
end
|
||||
|
||||
def type_of?(token_name, token)
|
||||
token != 'ParseError' and token_name == token.first
|
||||
end
|
||||
|
@ -38,9 +65,9 @@ class Html5TokenizerTestCase < Test::Unit::TestCase
|
|||
assert_nothing_raised message do
|
||||
tokenizer = HTML5::HTMLTokenizer.new(data['input'])
|
||||
|
||||
tokenizer.contentModelFlag = content_model_flag.to_sym
|
||||
tokenizer.content_model_flag = content_model_flag.to_sym
|
||||
|
||||
tokenizer.currentToken = {:type => :startTag, :name => data['lastStartTag']} if data.has_key?('lastStartTag')
|
||||
tokenizer.current_token = {:type => :startTag, :name => data['lastStartTag']} if data.has_key?('lastStartTag')
|
||||
|
||||
tokens = TokenizerTestParser.new(tokenizer).parse
|
||||
|
||||
|
@ -48,7 +75,7 @@ class Html5TokenizerTestCase < Test::Unit::TestCase
|
|||
|
||||
expected = concatenate_consecutive_characters(data['output'])
|
||||
|
||||
assert_equal expected, actual, message
|
||||
assert_tokens_match expected, actual, data["ignoreErrorOrder"], message
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -60,7 +60,11 @@ class TestTreeWalkers < Test::Unit::TestCase
|
|||
when :Comment
|
||||
output << "#{' '*indent}<!-- #{token[:data]} -->"
|
||||
when :Doctype
|
||||
output << "#{' '*indent}<!DOCTYPE #{token[:name]}>"
|
||||
if token[:name] and token[:name].any?
|
||||
output << "#{' '*indent}<!DOCTYPE #{token[:name]}>"
|
||||
else
|
||||
output << "#{' '*indent}<!DOCTYPE >"
|
||||
end
|
||||
when :Characters, :SpaceCharacters
|
||||
output << "#{' '*indent}\"#{token[:data]}\""
|
||||
else
|
||||
|
@ -76,7 +80,7 @@ class TestTreeWalkers < Test::Unit::TestCase
|
|||
next if test_name == 'tests5' # TODO
|
||||
|
||||
TestData.new(test_file, %w(data errors document-fragment document)).
|
||||
each_with_index do |(input, errors, innerHTML, expected), index|
|
||||
each_with_index do |(input, errors, inner_html, expected), index|
|
||||
|
||||
expected = expected.gsub("\n| ","\n")[2..-1]
|
||||
|
||||
|
@ -86,13 +90,13 @@ class TestTreeWalkers < Test::Unit::TestCase
|
|||
|
||||
parser = HTML5::HTMLParser.new(:tree => tree_class[:builder])
|
||||
|
||||
if innerHTML
|
||||
parser.parseFragment(input, innerHTML)
|
||||
if inner_html
|
||||
parser.parse_fragment(input, inner_html)
|
||||
else
|
||||
parser.parse(input)
|
||||
end
|
||||
|
||||
document = parser.tree.getDocument
|
||||
document = parser.tree.get_document
|
||||
|
||||
begin
|
||||
output = sortattrs(convertTokens(tree_class[:walker].new(document)))
|
||||
|
|
|
@ -54,7 +54,7 @@ class TokenizerTestParser
|
|||
@outputTokens.push(["Character", token[:data]])
|
||||
end
|
||||
|
||||
def processEOF(token)
|
||||
def process_eof(token)
|
||||
end
|
||||
|
||||
def processParseError(token)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue