Sync with latest HTML5lib

This commit is contained in:
Jacques Distler 2007-06-22 03:12:08 -05:00
parent bf572e295f
commit 8e92e4a3ab
41 changed files with 1334 additions and 564 deletions

View file

@ -24,6 +24,7 @@ rescue LoadError
def self.parse json
json.gsub!(/"\s*:/, '"=>')
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
null = nil
eval json
end
end

View file

@ -191,13 +191,13 @@ EOX
end
def test_br
assert_xhtml_equal <<EOX
assert_xhtml_equal <<EOX1
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>XLINK</title></head>
<body>
<br/>
</body></html>
EOX
EOX1
end
def xtest_strong

View file

@ -12,7 +12,7 @@ begin
rescue LoadError
end
$CHECK_PARSER_ERRORS = ARGV.delete('-p')
$CHECK_PARSER_ERRORS = ARGV.delete('-p') # TODO
puts 'Testing tree builders: ' + $tree_types_to_test * ', '
@ -45,9 +45,9 @@ class Html5ParserTestCase < Test::Unit::TestCase
actual_output = convertTreeDump(parser.tree.testSerializer(parser.tree.document))
assert_equal sortattrs(expected_output), sortattrs(actual_output), [
'Input:', input,
'Expected:', expected_output,
'Recieved:', actual_output
'', 'Input:', input,
'', 'Expected:', expected_output,
'', 'Recieved:', actual_output
].join("\n")
if $CHECK_PARSER_ERRORS

View file

@ -30,7 +30,7 @@ class SanitizeTest < Test::Unit::TestCase
:use_trailing_solidus => true,
:omit_optional_tags => false,
:inject_meta_charset => false,
:sanitize => true}).gsub(/^<div xmlns='http:\/\/www.w3.org\/1999\/xhtml'>(.*)<\/div>$/, '\1')
:sanitize => true}).gsub(/\A<div xmlns='http:\/\/www.w3.org\/1999\/xhtml'>(.*)<\/div>\Z/m, '\1')
rescue REXML::ParseException
return "Ill-formed XHTML!"
end
@ -65,6 +65,7 @@ class SanitizeTest < Test::Unit::TestCase
elsif VOID_ELEMENTS.include?(tag_name)
htmloutput = "<#{tag_name} title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
xhtmloutput = htmloutput
htmloutput += '<br/>' if tag_name == 'br'
rexmloutput = "<#{tag_name} title='1' />"
end
check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)

View file

@ -49,6 +49,18 @@ class Html5SerializeTestcase < Test::Unit::TestCase
elsif !expected.include?(result)
flunk("Expected: #{expected.inspect}, Received: #{result.inspect}")
end
return if test_name == 'optionaltags'
result = HTML5lib::XHTMLSerializer.
serialize(JsonWalker.new(test["input"]), (test["options"] || {}))
expected = test["xhtml"] || test["expected"]
if expected.length == 1
assert_equal(expected[0], result, test["description"])
elsif !expected.include?(result)
flunk("Expected: #{expected.inspect}, Received: #{result.inspect}")
end
end
end

View file

@ -52,13 +52,11 @@ class HTMLInputStreamTest < Test::Unit::TestCase
def test_newlines
stream = HTMLInputStream.new("\xef\xbb\xbf" + "a\nbb\r\nccc\rdddd")
assert_equal(0, stream.instance_eval {@tell})
assert_equal([1,0], stream.position)
assert_equal("a\nbb\n", stream.chars_until('c'))
assert_equal(6, stream.instance_eval {@tell})
assert_equal([3,0], stream.position)
assert_equal("ccc\ndddd", stream.chars_until('x'))
assert_equal(14, stream.instance_eval {@tell})
assert_equal([4,4], stream.position)
assert_equal([0,1,5,9], stream.instance_eval {@new_lines})
assert_equal([1,2,3], stream.instance_eval {@line_lengths})
end
end

View file

@ -30,9 +30,10 @@ class Html5TokenizerTestCase < Test::Unit::TestCase
def tokenizer_test(data)
(data['contentModelFlags'] || [:PCDATA]).each do |content_model_flag|
message = [
'Description:', data['description'],
'Input:', data['input'],
'Content Model Flag:', content_model_flag ] * "\n"
'', 'Description:', data['description'],
'', 'Input:', data['input'],
'', 'Content Model Flag:', content_model_flag,
'' ] * "\n"
assert_nothing_raised message do
tokenizer = HTML5lib::HTMLTokenizer.new(data['input'])

View file

@ -11,9 +11,9 @@ $tree_types_to_test = {
'rexml' =>
{:builder => HTML5lib::TreeBuilders['rexml'],
:walker => HTML5lib::TreeWalkers['rexml']},
# 'hpricot' =>
# {:builder => HTML5lib::TreeBuilders['hpricot'],
# :walker => HTML5lib::TreeWalkers['hpricot']},
'hpricot' =>
{:builder => HTML5lib::TreeBuilders['hpricot'],
:walker => HTML5lib::TreeWalkers['hpricot']},
}
puts 'Testing tree walkers: ' + $tree_types_to_test.keys * ', '
@ -46,7 +46,7 @@ class TestTreeWalkers < Test::Unit::TestCase
output = []
indent = 0
concatenateCharacterTokens(tokens) do |token|
case token[:type]
case token[:type]
when :StartTag, :EmptyTag
output << "#{' '*indent}<#{token[:name]}>"
indent += 2
@ -65,7 +65,7 @@ class TestTreeWalkers < Test::Unit::TestCase
output << "#{' '*indent}\"#{token[:data]}\""
else
# TODO: what to do with errors?
end
end
end
return output.join("\n")
end
@ -73,6 +73,7 @@ class TestTreeWalkers < Test::Unit::TestCase
html5lib_test_files('tree-construction').each do |test_file|
test_name = File.basename(test_file).sub('.dat', '')
next if test_name == 'tests5' # TODO
File.read(test_file).split("#data\n").each_with_index do |data, index|
next if data.empty?
@ -80,12 +81,11 @@ class TestTreeWalkers < Test::Unit::TestCase
innerHTML, input, expected_output, expected_errors =
HTML5lib::TestSupport::parseTestcase(data)
rexml = $tree_types_to_test['rexml']
$tree_types_to_test.each do |tree_name, treeClass|
$tree_types_to_test.each do |tree_name, tree_class|
define_method "test_#{test_name}_#{index}_#{tree_name}" do
parser = HTML5lib::HTMLParser.new(:tree => treeClass[:builder])
parser = HTML5lib::HTMLParser.new(:tree => tree_class[:builder])
if innerHTML
parser.parseFragment(input, innerHTML)
@ -96,10 +96,13 @@ class TestTreeWalkers < Test::Unit::TestCase
document = parser.tree.getDocument
begin
output = sortattrs(convertTokens(treeClass[:walker].new(document)))
output = sortattrs(convertTokens(tree_class[:walker].new(document)))
expected = sortattrs(expected_output)
errorMsg = "\n\nExpected:\n#{expected}\nRecieved:\n#{output}\n"
assert_equal(expected, output, errorMsg)
assert_equal expected, output, [
'', 'Input:', input,
'', 'Expected:', expected,
'', 'Recieved:', output
].join("\n")
rescue NotImplementedError
# Amnesty for those that confess...
end

View file

@ -18,7 +18,8 @@ class TokenizerTestParser
end
def processDoctype(token)
@outputTokens.push(["DOCTYPE", token[:name], token[:data]])
@outputTokens.push(["DOCTYPE", token[:name], token[:publicId],
token[:systemId], token[:correct]])
end
def processStartTag(token)