Minor S5 tweaks and Sync with Latest HTML5lib

This commit is contained in:
Jacques Distler 2007-08-30 12:19:10 -05:00
parent dbed460843
commit 81d3cdc8e4
81 changed files with 9887 additions and 1687 deletions

View file

@ -16,19 +16,8 @@ def html5_test_files(subdirectory)
Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
end
begin
require 'rubygems'
require 'json'
rescue LoadError
class JSON
def self.parse json
json.gsub!(/"\s*:/, '"=>')
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
null = nil
eval json
end
end
end
require 'rubygems'
require 'json'
module HTML5
module TestSupport

View file

@ -6,7 +6,7 @@ XMLELEM = /<(\w+\s*)((?:[-:\w]+="[^"]*"\s*)+)(\/?)>/
def assert_xml_equal(input, expected=nil, parser=HTML5::XMLParser)
sortattrs = proc {"<#{$1+$2.split.sort.join(' ')+$3}>"}
document = parser.parse(input.chomp).root
document = parser.parse(input.chomp, :lowercase_attr_name => false, :lowercase_element_name => false).root
if not expected
expected = input.chomp.gsub(XMLELEM,&sortattrs)
expected = expected.gsub(/&#(\d+);/) {[$1.to_i].pack('U')}
@ -257,6 +257,22 @@ EOX1
<head><title>PROLOG</title></head>
<body>
</body></html>
EOX2
end
def test_tagsoup
assert_xhtml_equal <<EOX1, <<EOX2.strip
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>TAGSOUP</title></head>
<body>
<u><blockquote><p></u>
</body></html>
EOX1
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>TAGSOUP</title></head>
<body>
<u/><blockquote><u/><p><u/>
</p></blockquote></body></html>
EOX2
end

View file

@ -26,8 +26,9 @@ class Html5ParserTestCase < Test::Unit::TestCase
test_name = File.basename(test_file).sub('.dat', '')
TestData.new(test_file, %w(data errors document-fragment document)).
each_with_index do |(input, errors, innerHTML, expected), index|
each_with_index do |(input, errors, inner_html, expected), index|
errors = errors.split("\n")
expected = expected.gsub("\n| ","\n")[2..-1]
$tree_types_to_test.each do |tree_name|
@ -35,8 +36,8 @@ class Html5ParserTestCase < Test::Unit::TestCase
parser = HTMLParser.new(:tree => TreeBuilders[tree_name])
if innerHTML
parser.parseFragment(input, innerHTML)
if inner_html
parser.parse_fragment(input, inner_html)
else
parser.parse(input)
end
@ -49,16 +50,15 @@ class Html5ParserTestCase < Test::Unit::TestCase
'', 'Recieved:', actual_output
].join("\n")
if $CHECK_PARSER_ERRORS
actual_errors = parser.errors.map do |(line, col), message|
'Line: %i Col: %i %s' % [line, col, message]
end
assert_equal errors.length, parser.errors.length, [
'Input', input + "\n",
'Expected errors:', errors.join("\n"),
'Actual errors:', actual_errors.join("\n")
].join("\n")
actual_errors = parser.errors.map do |(line, col), message|
'Line: %i Col: %i %s' % [line, col, message]
end
assert_equal errors.length, parser.errors.length, [
'', 'Input', input,
'', "Expected errors (#{errors.length}):", errors.join("\n"),
'', "Actual errors (#{actual_errors.length}):",
actual_errors.join("\n")
].join("\n")
end
end

View file

@ -12,17 +12,17 @@ class SanitizeTest < Test::Unit::TestCase
include HTML5
def sanitize_xhtml stream
XHTMLParser.parseFragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8'}).to_s
XHTMLParser.parse_fragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8', :lowercase_element_name => false, :lowercase_attr_name => false}).to_s
end
def sanitize_html stream
HTMLParser.parseFragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8'}).to_s
HTMLParser.parse_fragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8', :lowercase_element_name => false, :lowercase_attr_name => false}).to_s
end
def sanitize_rexml stream
require 'rexml/document'
doc = REXML::Document.new("<div xmlns='http://www.w3.org/1999/xhtml'>#{stream}</div>")
tokens = TreeWalkers.getTreeWalker('rexml').new(doc)
tokens = TreeWalkers.get_tree_walker('rexml').new(doc)
XHTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
:quote_char => "'",
:inject_meta_charset => false,
@ -39,8 +39,8 @@ class SanitizeTest < Test::Unit::TestCase
HTMLSanitizer::ALLOWED_ELEMENTS.each do |tag_name|
define_method "test_should_allow_#{tag_name}_tag" do
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
xhtmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name}>"
rexmloutput = xhtmloutput

View file

@ -12,17 +12,17 @@ class JsonWalker < HTML5::TreeWalkers::Base
@tree.each do |token|
case token[0]
when 'StartTag'
yield startTag(token[1], token[2])
yield start_tag(token[1], token[2])
when 'EndTag'
yield endTag(token[1])
yield end_tag(token[1])
when 'EmptyTag'
yield emptyTag(token[1], token[2])
yield empty_tag(token[1], token[2])
when 'Comment'
yield comment(token[1])
when 'Characters', 'SpaceCharacters'
text(token[1]) {|textToken| yield textToken}
when 'Doctype'
yield doctype(token[1])
yield doctype(token[1], token[2], token[3])
else
raise "Unknown token type: " + token[0]
end

View file

@ -0,0 +1,27 @@
require File.join(File.dirname(__FILE__), 'preamble')
require "html5/sniffer"
class TestFeedTypeSniffer < Test::Unit::TestCase
include HTML5
include TestSupport
include Sniffer
html5_test_files('sniffer').each do |test_file|
test_name = File.basename(test_file).sub('.test', '')
tests = JSON.parse(File.read(test_file))
tests.each_with_index do |data, index|
define_method('test_%s_%d' % [test_name, index + 1]) do
assert_equal data['type'], html_or_feed(data['input'])
end
end
end
# each_with_index do |t, i|
# define_method "test_#{i}" do
# assert_equal t[0], sniff_feed_type(t[1])
# end
# end
end

View file

@ -6,6 +6,33 @@ require 'tokenizer_test_parser'
class Html5TokenizerTestCase < Test::Unit::TestCase
def assert_tokens_match(expectedTokens, receivedTokens, ignoreErrorOrder, message)
if !ignoreErrorOrder
return expectedTokens == receivedTokens
else
#Sort the tokens into two groups; non-parse errors and parse errors
expected = [[],[]]
received = [[],[]]
for token in expectedTokens
if token != "ParseError"
expected[0] << token
else
expected[1] << token
end
end
for token in receivedTokens
if token != "ParseError"
received[0] << token
else
received[1] << token
end
end
assert_equal expected, received, message
end
end
def type_of?(token_name, token)
token != 'ParseError' and token_name == token.first
end
@ -38,9 +65,9 @@ class Html5TokenizerTestCase < Test::Unit::TestCase
assert_nothing_raised message do
tokenizer = HTML5::HTMLTokenizer.new(data['input'])
tokenizer.contentModelFlag = content_model_flag.to_sym
tokenizer.content_model_flag = content_model_flag.to_sym
tokenizer.currentToken = {:type => :startTag, :name => data['lastStartTag']} if data.has_key?('lastStartTag')
tokenizer.current_token = {:type => :startTag, :name => data['lastStartTag']} if data.has_key?('lastStartTag')
tokens = TokenizerTestParser.new(tokenizer).parse
@ -48,7 +75,7 @@ class Html5TokenizerTestCase < Test::Unit::TestCase
expected = concatenate_consecutive_characters(data['output'])
assert_equal expected, actual, message
assert_tokens_match expected, actual, data["ignoreErrorOrder"], message
end
end
end

View file

@ -60,7 +60,11 @@ class TestTreeWalkers < Test::Unit::TestCase
when :Comment
output << "#{' '*indent}<!-- #{token[:data]} -->"
when :Doctype
output << "#{' '*indent}<!DOCTYPE #{token[:name]}>"
if token[:name] and token[:name].any?
output << "#{' '*indent}<!DOCTYPE #{token[:name]}>"
else
output << "#{' '*indent}<!DOCTYPE >"
end
when :Characters, :SpaceCharacters
output << "#{' '*indent}\"#{token[:data]}\""
else
@ -76,7 +80,7 @@ class TestTreeWalkers < Test::Unit::TestCase
next if test_name == 'tests5' # TODO
TestData.new(test_file, %w(data errors document-fragment document)).
each_with_index do |(input, errors, innerHTML, expected), index|
each_with_index do |(input, errors, inner_html, expected), index|
expected = expected.gsub("\n| ","\n")[2..-1]
@ -86,13 +90,13 @@ class TestTreeWalkers < Test::Unit::TestCase
parser = HTML5::HTMLParser.new(:tree => tree_class[:builder])
if innerHTML
parser.parseFragment(input, innerHTML)
if inner_html
parser.parse_fragment(input, inner_html)
else
parser.parse(input)
end
document = parser.tree.getDocument
document = parser.tree.get_document
begin
output = sortattrs(convertTokens(tree_class[:walker].new(document)))

View file

@ -54,7 +54,7 @@ class TokenizerTestParser
@outputTokens.push(["Character", token[:data]])
end
def processEOF(token)
def process_eof(token)
end
def processParseError(token)