a6429f8c22
Completely removed the html5lib sanitizer. Fixed the string-handling to work in both Ruby 1.8.x and 1.9.2. There are still, inexplicably, two functional tests that fail. But the rest seems to work quite well.
95 lines
2.6 KiB
Ruby
95 lines
2.6 KiB
Ruby
require File.join(File.dirname(__FILE__), 'preamble')
|
|
|
|
require 'html5/tokenizer'
|
|
|
|
require 'tokenizer_test_parser'
|
|
|
|
class Html5TokenizerTestCase < Test::Unit::TestCase
|
|
|
|
def assert_tokens_match(expectedTokens, receivedTokens, ignoreErrorOrder, message)
|
|
if !ignoreErrorOrder
|
|
return expectedTokens == receivedTokens
|
|
else
|
|
#Sort the tokens into two groups; non-parse errors and parse errors
|
|
expected = [[],[]]
|
|
received = [[],[]]
|
|
|
|
for token in expectedTokens
|
|
if token != "ParseError"
|
|
expected[0] << token
|
|
else
|
|
expected[1] << token
|
|
end
|
|
end
|
|
|
|
for token in receivedTokens
|
|
if token != "ParseError"
|
|
received[0] << token
|
|
else
|
|
received[1] << token
|
|
end
|
|
end
|
|
assert_equal expected, received, message
|
|
end
|
|
end
|
|
|
|
def type_of?(token_name, token)
|
|
token != 'ParseError' and token_name == token.first
|
|
end
|
|
|
|
def convert_attribute_arrays_to_hashes(tokens)
|
|
tokens.inject([]) do |tokens, token|
|
|
token[2] = Hash[*token[2].reverse.flatten] if type_of?('StartTag', token)
|
|
tokens << token
|
|
end
|
|
end
|
|
|
|
def concatenate_consecutive_characters(tokens)
|
|
tokens.inject([]) do |tokens, token|
|
|
if type_of?('Character', token) and tokens.any? and type_of?('Character', tokens.last)
|
|
tokens.last[1] = tokens.last[1] + token[1]
|
|
next tokens
|
|
end
|
|
tokens << token
|
|
end
|
|
end
|
|
|
|
def tokenizer_test(data)
|
|
(data['contentModelFlags'] || [:PCDATA]).each do |content_model_flag|
|
|
message = [
|
|
'', 'Description:', data['description'],
|
|
'', 'Input:', data['input'],
|
|
'', 'Content Model Flag:', content_model_flag,
|
|
'' ] * "\n"
|
|
|
|
assert_nothing_raised message do
|
|
tokenizer = HTML5::HTMLTokenizer.new(data['input'])
|
|
|
|
tokenizer.content_model_flag = content_model_flag.to_sym
|
|
|
|
tokenizer.current_token = {:type => :startTag, :name => data['lastStartTag']} if data.has_key?('lastStartTag')
|
|
|
|
tokens = TokenizerTestParser.new(tokenizer).parse
|
|
|
|
actual = concatenate_consecutive_characters(convert_attribute_arrays_to_hashes(tokens))
|
|
|
|
expected = concatenate_consecutive_characters(data['output'])
|
|
|
|
assert_tokens_match expected, actual, data["ignoreErrorOrder"], message
|
|
end
|
|
end
|
|
end
|
|
|
|
html5_test_files('tokenizer').each do |test_file|
|
|
test_name = File.basename(test_file).sub('.test', '')
|
|
|
|
tests = JSON.parse(File.read(test_file))['tests']
|
|
|
|
tests.each_with_index do |data, index|
|
|
define_method('test_%s_%d' % [test_name, index + 1]) { tokenizer_test data }
|
|
end
|
|
end
|
|
|
|
end
|
|
|