Sync with latest HTML5lib and latest Maruku

This commit is contained in:
Jacques Distler 2007-07-04 17:36:59 -05:00
parent 8e92e4a3ab
commit 8ccaad85a5
71 changed files with 1974 additions and 1621 deletions

View file

@ -1,81 +1,81 @@
require 'test/unit'
HTML5LIB_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
if File.exists?(File.join(HTML5LIB_BASE, 'testdata'))
TESTDATA_DIR = File.join(HTML5LIB_BASE, 'testdata')
else
TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata')
end
$:.unshift File.join(File.dirname(File.dirname(__FILE__)),'lib')
$:.unshift File.dirname(__FILE__)
def html5lib_test_files(subdirectory)
Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
end
begin
require 'rubygems'
require 'json'
rescue LoadError
class JSON
def self.parse json
json.gsub!(/"\s*:/, '"=>')
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
null = nil
eval json
end
end
end
module HTML5lib
module TestSupport
def self.startswith?(a, b)
b[0... a.length] == a
end
def self.parseTestcase(data)
innerHTML = nil
input = []
output = []
errors = []
currentList = input
data.split(/\n/).each do |line|
if !line.empty? and !startswith?("#errors", line) and
!startswith?("#document", line) and
!startswith?("#data", line) and
!startswith?("#document-fragment", line)
if currentList == output and startswith?("|", line)
currentList.push(line[2..-1])
else
currentList.push(line)
end
elsif line == "#errors"
currentList = errors
elsif line == "#document" or startswith?("#document-fragment", line)
if startswith?("#document-fragment", line)
innerHTML = line[19..-1]
raise AssertionError unless innerHTML
end
currentList = output
end
end
return innerHTML, input.join("\n"), output.join("\n"), errors
end
# convert the output of str(document) to the format used in the testcases
def convertTreeDump(treedump)
treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
end
def sortattrs(output)
output.gsub(/^(\s+)\w+=.*(\n\1\w+=.*)+/) do |match|
match.split("\n").sort.join("\n")
end
end
end
end
require 'test/unit'
HTML5_BASE = File.dirname(File.dirname(File.dirname(File.expand_path(__FILE__))))
if File.exists?(File.join(HTML5_BASE, 'testdata'))
TESTDATA_DIR = File.join(HTML5_BASE, 'testdata')
else
TESTDATA_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), 'testdata')
end
$:.unshift File.join(File.dirname(File.dirname(__FILE__)),'lib')
$:.unshift File.dirname(__FILE__)
def html5_test_files(subdirectory)
Dir[File.join(TESTDATA_DIR, subdirectory, '*.*')]
end
begin
require 'rubygems'
require 'json'
rescue LoadError
class JSON
def self.parse json
json.gsub!(/"\s*:/, '"=>')
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
null = nil
eval json
end
end
end
module HTML5
module TestSupport
# convert the output of str(document) to the format used in the testcases
def convertTreeDump(treedump)
treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
end
def sortattrs(output)
output.gsub(/^(\s+)\w+=.*(\n\1\w+=.*)+/) do |match|
match.split("\n").sort.join("\n")
end
end
class TestData
include Enumerable
def initialize(filename, sections)
@f = open(filename)
@sections = sections
end
def each
data = {}
key=nil
@f.each_line do |line|
if line[0] == ?# and @sections.include?(line[1..-2])
heading = line[1..-2]
if data.any? and heading == @sections[0]
data[key].chomp! #Remove trailing newline
yield normaliseOutput(data)
data = {}
end
key = heading
data[key]=""
elsif key
data[key] += line
end
end
yield normaliseOutput(data) if data
end
def normaliseOutput(data)
#Remove trailing newlines
data.keys.each { |key| data[key].chomp! }
@sections.map {|heading| data[heading]}
end
end
end
end

View file

@ -1,8 +1,10 @@
require File.join(File.dirname(__FILE__), 'preamble')
require 'html5lib/inputstream'
require 'html5/inputstream'
class Html5EncodingTestCase < Test::Unit::TestCase
include HTML5
include TestSupport
begin
require 'rubygems'
@ -10,23 +12,21 @@ class Html5EncodingTestCase < Test::Unit::TestCase
def test_chardet
file = File.open(File.join(TESTDATA_DIR, 'encoding', 'chardet', 'test_big5.txt'), 'r')
stream = HTML5lib::HTMLInputStream.new(file, :chardet => true)
stream = HTML5::HTMLInputStream.new(file, :chardet => true)
assert_equal 'big5', stream.char_encoding.downcase
rescue LoadError
puts "chardet not found, skipping chardet tests"
end
end
html5lib_test_files('encoding').each do |test_file|
html5_test_files('encoding').each do |test_file|
test_name = File.basename(test_file).sub('.dat', '').tr('-', '')
File.read(test_file).split("#data\n").each_with_index do |data, index|
next if data.empty?
input, encoding = data.split(/\n#encoding\s+/, 2)
encoding = encoding.split[0]
TestData.new(test_file, %w(data encoding)).
each_with_index do |(input, encoding), index|
define_method 'test_%s_%d' % [ test_name, index + 1 ] do
stream = HTML5lib::HTMLInputStream.new(input, :chardet => false)
stream = HTML5::HTMLInputStream.new(input, :chardet => false)
assert_equal encoding.downcase, stream.char_encoding.downcase, input
end
end

View file

@ -1,23 +1,23 @@
require File.join(File.dirname(__FILE__), 'preamble')
require 'html5lib/liberalxmlparser'
require 'html5/liberalxmlparser'
XMLELEM = /<(\w+\s*)((?:[-:\w]+="[^"]*"\s*)+)(\/?)>/
SORTATTRS = '<#{$1+$2.split.sort.join(' ')+$3}>'
def assert_xml_equal(input, expected=nil, parser=HTML5lib::XMLParser)
def assert_xml_equal(input, expected=nil, parser=HTML5::XMLParser)
sortattrs = proc {"<#{$1+$2.split.sort.join(' ')+$3}>"}
document = parser.parse(input.chomp).root
if not expected
expected = input.chomp.gsub(XMLELEM,SORTATTRS)
expected = input.chomp.gsub(XMLELEM,&sortattrs)
expected = expected.gsub(/&#(\d+);/) {[$1.to_i].pack('U')}
output = document.to_s.gsub(/'/,'"').gsub(XMLELEM,SORTATTRS)
output = document.to_s.gsub(/'/,'"').gsub(XMLELEM,&sortattrs)
assert_equal(expected, output)
else
assert_equal(expected, document.to_s.gsub(/'/,'"'))
end
end
def assert_xhtml_equal(input, expected=nil, parser=HTML5lib::XHTMLParser)
def assert_xhtml_equal(input, expected=nil, parser=HTML5::XHTMLParser)
assert_xml_equal(input, expected, parser)
end
@ -34,10 +34,10 @@ class BasicXhtml5Test < Test::Unit::TestCase
def test_title_body_named_charref
assert_xhtml_equal(
'<title>mdash</title>A &mdash B',
'<title>ntilde</title>A &ntilde B',
'<html xmlns="http://www.w3.org/1999/xhtml">' +
'<head><title>mdash</title></head>' +
'<body>A '+ [0x2014].pack('U') + ' B</body>' +
'<head><title>ntilde</title></head>' +
'<body>A '+ [0xF1].pack('U') + ' B</body>' +
'</html>')
end
end
@ -193,20 +193,71 @@ EOX
def test_br
assert_xhtml_equal <<EOX1
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>XLINK</title></head>
<head><title>BR</title></head>
<body>
<br/>
</body></html>
EOX1
end
def xtest_strong
def test_strong
assert_xhtml_equal <<EOX
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>XLINK</title></head>
<head><title>STRONG</title></head>
<body>
<strong></strong>
</body></html>
EOX
end
def test_script
assert_xhtml_equal <<EOX
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>SCRIPT</title></head>
<body>
<script>1 &lt; 2 &amp; 3</script>
</body></html>
EOX
end
def test_script_src
assert_xhtml_equal <<EOX1, <<EOX2.strip
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>SCRIPT</title><script src="http://example.com"/></head>
<body>
<script>1 &lt; 2 &amp; 3</script>
</body></html>
EOX1
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>SCRIPT</title><script src="http://example.com"></script></head>
<body>
<script>1 &lt; 2 &amp; 3</script>
</body></html>
EOX2
end
def test_title
assert_xhtml_equal <<EOX
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>1 &lt; 2 &amp; 3</title></head>
<body>
</body></html>
EOX
end
def test_prolog
assert_xhtml_equal <<EOX1, <<EOX2.strip
<?xml version="1.0" encoding="UTF-8" ?>
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>PROLOG</title></head>
<body>
</body></html>
EOX1
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>PROLOG</title></head>
<body>
</body></html>
EOX2
end
end

View file

@ -1,7 +1,7 @@
require File.join(File.dirname(__FILE__), 'preamble')
require 'html5lib/treebuilders'
require 'html5lib/html5parser'
require 'html5/treebuilders'
require 'html5/html5parser'
$tree_types_to_test = ['simpletree', 'rexml']
@ -18,18 +18,17 @@ puts 'Testing tree builders: ' + $tree_types_to_test * ', '
class Html5ParserTestCase < Test::Unit::TestCase
include HTML5lib
include HTML5
include TestSupport
html5lib_test_files('tree-construction').each do |test_file|
html5_test_files('tree-construction').each do |test_file|
test_name = File.basename(test_file).sub('.dat', '')
File.read(test_file).split("#data\n").each_with_index do |data, index|
next if data.empty?
innerHTML, input, expected_output, expected_errors =
TestSupport.parseTestcase(data)
TestData.new(test_file, %w(data errors document-fragment document)).
each_with_index do |(input, errors, innerHTML, expected), index|
expected = expected.gsub("\n| ","\n")[2..-1]
$tree_types_to_test.each do |tree_name|
define_method 'test_%s_%d_%s' % [ test_name, index + 1, tree_name ] do
@ -44,9 +43,9 @@ class Html5ParserTestCase < Test::Unit::TestCase
actual_output = convertTreeDump(parser.tree.testSerializer(parser.tree.document))
assert_equal sortattrs(expected_output), sortattrs(actual_output), [
assert_equal sortattrs(expected), sortattrs(actual_output), [
'', 'Input:', input,
'', 'Expected:', expected_output,
'', 'Expected:', expected,
'', 'Recieved:', actual_output
].join("\n")
@ -54,9 +53,9 @@ class Html5ParserTestCase < Test::Unit::TestCase
actual_errors = parser.errors.map do |(line, col), message|
'Line: %i Col: %i %s' % [line, col, message]
end
assert_equal expected_errors.length, parser.errors.length, [
assert_equal errors.length, parser.errors.length, [
'Input', input + "\n",
'Expected errors:', expected_errors.join("\n"),
'Expected errors:', errors.join("\n"),
'Actual errors:', actual_errors.join("\n")
].join("\n")
end

View file

@ -2,14 +2,14 @@
require File.join(File.dirname(__FILE__), 'preamble')
require 'html5lib/html5parser'
require 'html5lib/liberalxmlparser'
require 'html5lib/treewalkers'
require 'html5lib/serializer'
require 'html5lib/sanitizer'
require 'html5/html5parser'
require 'html5/liberalxmlparser'
require 'html5/treewalkers'
require 'html5/serializer'
require 'html5/sanitizer'
class SanitizeTest < Test::Unit::TestCase
include HTML5lib
include HTML5
def sanitize_xhtml stream
XHTMLParser.parseFragment(stream, {:tokenizer => HTMLSanitizer, :encoding => 'utf-8'}).to_s
@ -131,7 +131,7 @@ class SanitizeTest < Test::Unit::TestCase
# check_sanitization(input, output, output, output)
# end
html5lib_test_files('sanitizer').each do |filename|
html5_test_files('sanitizer').each do |filename|
JSON::parse(open(filename).read).each do |test|
define_method "test_#{test['name']}" do
check_sanitization(

View file

@ -1,13 +1,13 @@
require File.join(File.dirname(__FILE__), 'preamble')
require 'html5lib/html5parser'
require 'html5lib/serializer'
require 'html5lib/treewalkers'
require 'html5/html5parser'
require 'html5/serializer'
require 'html5/treewalkers'
#Run the serialize error checks
checkSerializeErrors = false
class JsonWalker < HTML5lib::TreeWalkers::Base
class JsonWalker < HTML5::TreeWalkers::Base
def each
@tree.each do |token|
case token[0]
@ -31,7 +31,7 @@ class JsonWalker < HTML5lib::TreeWalkers::Base
end
class Html5SerializeTestcase < Test::Unit::TestCase
html5lib_test_files('serializer').each do |filename|
html5_test_files('serializer').each do |filename|
test_name = File.basename(filename).sub('.test', '')
tests = JSON::parse(open(filename).read)
tests['tests'].each_with_index do |test, index|
@ -41,7 +41,7 @@ class Html5SerializeTestcase < Test::Unit::TestCase
test["options"][:encoding] = test["options"]["encoding"]
end
result = HTML5lib::HTMLSerializer.
result = HTML5::HTMLSerializer.
serialize(JsonWalker.new(test["input"]), (test["options"] || {}))
expected = test["expected"]
if expected.length == 1
@ -52,7 +52,7 @@ class Html5SerializeTestcase < Test::Unit::TestCase
return if test_name == 'optionaltags'
result = HTML5lib::XHTMLSerializer.
result = HTML5::XHTMLSerializer.
serialize(JsonWalker.new(test["input"]), (test["options"] || {}))
expected = test["xhtml"] || test["expected"]
if expected.length == 1

View file

@ -1,9 +1,9 @@
require File.join(File.dirname(__FILE__), 'preamble')
require 'html5lib/inputstream'
require 'html5/inputstream'
class HTMLInputStreamTest < Test::Unit::TestCase
include HTML5lib
include HTML5
def test_char_ascii
stream = HTMLInputStream.new("'", :encoding=>'ascii')

View file

@ -1,6 +1,6 @@
require File.join(File.dirname(__FILE__), 'preamble')
require 'html5lib/tokenizer'
require 'html5/tokenizer'
require 'tokenizer_test_parser'
@ -36,7 +36,7 @@ class Html5TokenizerTestCase < Test::Unit::TestCase
'' ] * "\n"
assert_nothing_raised message do
tokenizer = HTML5lib::HTMLTokenizer.new(data['input'])
tokenizer = HTML5::HTMLTokenizer.new(data['input'])
tokenizer.contentModelFlag = content_model_flag.to_sym
@ -53,7 +53,7 @@ class Html5TokenizerTestCase < Test::Unit::TestCase
end
end
html5lib_test_files('tokenizer').each do |test_file|
html5_test_files('tokenizer').each do |test_file|
test_name = File.basename(test_file).sub('.test', '')
tests = JSON.parse(File.read(test_file))['tests']

View file

@ -1,25 +1,25 @@
require File.join(File.dirname(__FILE__), 'preamble')
require 'html5lib/html5parser'
require 'html5lib/treewalkers'
require 'html5lib/treebuilders'
require 'html5/html5parser'
require 'html5/treewalkers'
require 'html5/treebuilders'
$tree_types_to_test = {
'simpletree' =>
{:builder => HTML5lib::TreeBuilders['simpletree'],
:walker => HTML5lib::TreeWalkers['simpletree']},
{:builder => HTML5::TreeBuilders['simpletree'],
:walker => HTML5::TreeWalkers['simpletree']},
'rexml' =>
{:builder => HTML5lib::TreeBuilders['rexml'],
:walker => HTML5lib::TreeWalkers['rexml']},
{:builder => HTML5::TreeBuilders['rexml'],
:walker => HTML5::TreeWalkers['rexml']},
'hpricot' =>
{:builder => HTML5lib::TreeBuilders['hpricot'],
:walker => HTML5lib::TreeWalkers['hpricot']},
{:builder => HTML5::TreeBuilders['hpricot'],
:walker => HTML5::TreeWalkers['hpricot']},
}
puts 'Testing tree walkers: ' + $tree_types_to_test.keys * ', '
class TestTreeWalkers < Test::Unit::TestCase
include HTML5lib::TestSupport
include HTML5::TestSupport
def concatenateCharacterTokens(tokens)
charactersToken = nil
@ -70,22 +70,21 @@ class TestTreeWalkers < Test::Unit::TestCase
return output.join("\n")
end
html5lib_test_files('tree-construction').each do |test_file|
html5_test_files('tree-construction').each do |test_file|
test_name = File.basename(test_file).sub('.dat', '')
next if test_name == 'tests5' # TODO
File.read(test_file).split("#data\n").each_with_index do |data, index|
next if data.empty?
TestData.new(test_file, %w(data errors document-fragment document)).
each_with_index do |(input, errors, innerHTML, expected), index|
innerHTML, input, expected_output, expected_errors =
HTML5lib::TestSupport::parseTestcase(data)
expected = expected.gsub("\n| ","\n")[2..-1]
$tree_types_to_test.each do |tree_name, tree_class|
define_method "test_#{test_name}_#{index}_#{tree_name}" do
parser = HTML5lib::HTMLParser.new(:tree => tree_class[:builder])
parser = HTML5::HTMLParser.new(:tree => tree_class[:builder])
if innerHTML
parser.parseFragment(input, innerHTML)
@ -97,7 +96,7 @@ class TestTreeWalkers < Test::Unit::TestCase
begin
output = sortattrs(convertTokens(tree_class[:walker].new(document)))
expected = sortattrs(expected_output)
expected = sortattrs(expected)
assert_equal expected, output, [
'', 'Input:', input,
'', 'Expected:', expected,

View file

@ -1,63 +1,63 @@
require 'html5lib/constants'
class TokenizerTestParser
def initialize(tokenizer)
@tokenizer = tokenizer
end
def parse
@outputTokens = []
debug = nil
for token in @tokenizer
debug = token.inspect if token[:type] == :ParseError
send(('process' + token[:type].to_s), token)
end
return @outputTokens
end
def processDoctype(token)
@outputTokens.push(["DOCTYPE", token[:name], token[:publicId],
token[:systemId], token[:correct]])
end
def processStartTag(token)
@outputTokens.push(["StartTag", token[:name], token[:data]])
end
def processEmptyTag(token)
if not HTML5lib::VOID_ELEMENTS.include? token[:name]
@outputTokens.push("ParseError")
end
@outputTokens.push(["StartTag", token[:name], token[:data]])
end
def processEndTag(token)
if token[:data].length > 0
self.processParseError(token)
end
@outputTokens.push(["EndTag", token[:name]])
end
def processComment(token)
@outputTokens.push(["Comment", token[:data]])
end
def processCharacters(token)
@outputTokens.push(["Character", token[:data]])
end
alias processSpaceCharacters processCharacters
def processCharacters(token)
@outputTokens.push(["Character", token[:data]])
end
def processEOF(token)
end
def processParseError(token)
@outputTokens.push("ParseError")
end
end
require 'html5/constants'
class TokenizerTestParser
def initialize(tokenizer)
@tokenizer = tokenizer
end
def parse
@outputTokens = []
debug = nil
for token in @tokenizer
debug = token.inspect if token[:type] == :ParseError
send(('process' + token[:type].to_s), token)
end
return @outputTokens
end
def processDoctype(token)
@outputTokens.push(["DOCTYPE", token[:name], token[:publicId],
token[:systemId], token[:correct]])
end
def processStartTag(token)
@outputTokens.push(["StartTag", token[:name], token[:data]])
end
def processEmptyTag(token)
if not HTML5::VOID_ELEMENTS.include? token[:name]
@outputTokens.push("ParseError")
end
@outputTokens.push(["StartTag", token[:name], token[:data]])
end
def processEndTag(token)
if token[:data].length > 0
self.processParseError(token)
end
@outputTokens.push(["EndTag", token[:name]])
end
def processComment(token)
@outputTokens.push(["Comment", token[:data]])
end
def processCharacters(token)
@outputTokens.push(["Character", token[:data]])
end
alias processSpaceCharacters processCharacters
def processCharacters(token)
@outputTokens.push(["Character", token[:data]])
end
def processEOF(token)
end
def processParseError(token)
@outputTokens.push("ParseError")
end
end