a6429f8c22
Completely removed the html5lib sanitizer. Fixed the string-handling to work in both Ruby 1.8.x and 1.9.2. There are still, inexplicably, two functional tests that fail. But the rest seems to work quite well.
135 lines
4.1 KiB
Ruby
135 lines
4.1 KiB
Ruby
require File.join(File.dirname(__FILE__), 'preamble')
|
|
|
|
require 'html5/html5parser'
|
|
require 'html5/treewalkers'
|
|
require 'html5/treebuilders'
|
|
|
|
$tree_types_to_test = {
|
|
'simpletree' =>
|
|
{:builder => HTML5::TreeBuilders['simpletree'],
|
|
:walker => HTML5::TreeWalkers['simpletree']},
|
|
'rexml' =>
|
|
{:builder => HTML5::TreeBuilders['rexml'],
|
|
:walker => HTML5::TreeWalkers['rexml']},
|
|
'hpricot' =>
|
|
{:builder => HTML5::TreeBuilders['hpricot'],
|
|
:walker => HTML5::TreeWalkers['hpricot']},
|
|
}
|
|
|
|
puts 'Testing tree walkers: ' + $tree_types_to_test.keys * ', '
|
|
|
|
class TestTreeWalkers < Test::Unit::TestCase
|
|
include HTML5::TestSupport
|
|
|
|
def concatenateCharacterTokens(tokens)
|
|
charactersToken = nil
|
|
for token in tokens
|
|
type = token[:type]
|
|
if [:Characters, :SpaceCharacters].include?(type)
|
|
if charactersToken == nil
|
|
charactersToken = {:type => :Characters, :data => token[:data]}
|
|
else
|
|
charactersToken[:data] += token[:data]
|
|
end
|
|
else
|
|
if charactersToken != nil
|
|
yield charactersToken
|
|
charactersToken = nil
|
|
end
|
|
yield token
|
|
end
|
|
end
|
|
yield charactersToken if charactersToken != nil
|
|
end
|
|
|
|
def convertTokens(tokens)
|
|
output = []
|
|
indent = 0
|
|
concatenateCharacterTokens(tokens) do |token|
|
|
case token[:type]
|
|
when :StartTag, :EmptyTag
|
|
output << "#{' '*indent}<#{token[:name]}>"
|
|
indent += 2
|
|
for name, value in token[:data].to_a.sort
|
|
next if name=='xmlns'
|
|
output << "#{' '*indent}#{name}=\"#{value}\""
|
|
end
|
|
indent -= 2 if token[:type] == :EmptyTag
|
|
when :EndTag
|
|
indent -= 2
|
|
when :Comment
|
|
output << "#{' '*indent}<!-- #{token[:data]} -->"
|
|
when :Doctype
|
|
if token[:name] and token[:name].any?
|
|
output << "#{' '*indent}<!DOCTYPE #{token[:name]}>"
|
|
else
|
|
output << "#{' '*indent}<!DOCTYPE >"
|
|
end
|
|
when :Characters, :SpaceCharacters
|
|
output << "#{' '*indent}\"#{token[:data]}\""
|
|
end
|
|
end
|
|
output.join("\n")
|
|
end
|
|
|
|
html5_test_files('tree-construction').each do |test_file|
|
|
|
|
test_name = File.basename(test_file).sub('.dat', '')
|
|
next if test_name == 'tests5' # TODO
|
|
|
|
TestData.new(test_file, %w(data errors document-fragment document)).
|
|
each_with_index do |(input, errors, inner_html, expected), index|
|
|
|
|
expected = expected.gsub("\n| ","\n")[2..-1]
|
|
|
|
$tree_types_to_test.each do |tree_name, tree_class|
|
|
|
|
define_method "test_#{test_name}_#{index}_#{tree_name}" do
|
|
|
|
parser = HTML5::HTMLParser.new(:tree => tree_class[:builder])
|
|
|
|
if inner_html
|
|
parser.parse_fragment(input, inner_html)
|
|
else
|
|
parser.parse(input)
|
|
end
|
|
|
|
document = parser.tree.get_document
|
|
|
|
begin
|
|
output = sortattrs(convertTokens(tree_class[:walker].new(document)))
|
|
expected = sortattrs(expected)
|
|
assert_equal expected, output, [
|
|
'', 'Input:', input,
|
|
'', 'Expected:', expected,
|
|
'', 'Recieved:', output
|
|
].join("\n")
|
|
rescue NotImplementedError
|
|
# Amnesty for those that confess...
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def test_all_tokens
|
|
expected = [
|
|
{:data => [], :type => :StartTag, :name => 'html'},
|
|
{:data => [], :type => :StartTag, :name => 'head'},
|
|
{:data => [], :type => :EndTag, :name => 'head'},
|
|
{:data => [], :type => :StartTag, :name => 'body'},
|
|
{:data => [], :type => :EndTag, :name => 'body'},
|
|
{:data => [], :type => :EndTag, :name => 'html'}]
|
|
for treeName, tree_class in $tree_types_to_test
|
|
p = HTML5::HTMLParser.new(:tree => tree_class[:builder])
|
|
document = p.parse("<html></html>")
|
|
# document = tree_class.get(:adapter)(document)
|
|
output = tree_class[:walker].new(document)
|
|
expected.zip(output) do |expected_token, output_token|
|
|
assert_equal(expected_token, output_token)
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
end
|