0f6889e09f
Fix Diego Restrepo's bug (see Rev 184). Update to latest HTML5lib.
95 lines
2.6 KiB
Ruby
95 lines
2.6 KiB
Ruby
require File.join(File.dirname(__FILE__), 'preamble')
|
|
|
|
require 'html5/tokenizer'
|
|
|
|
require 'tokenizer_test_parser'
|
|
|
|
class Html5TokenizerTestCase < Test::Unit::TestCase
|
|
|
|
def assert_tokens_match(expectedTokens, receivedTokens, ignoreErrorOrder, message)
|
|
if !ignoreErrorOrder
|
|
return expectedTokens == receivedTokens
|
|
else
|
|
#Sort the tokens into two groups; non-parse errors and parse errors
|
|
expected = [[],[]]
|
|
received = [[],[]]
|
|
|
|
for token in expectedTokens
|
|
if token != "ParseError"
|
|
expected[0] << token
|
|
else
|
|
expected[1] << token
|
|
end
|
|
end
|
|
|
|
for token in receivedTokens
|
|
if token != "ParseError"
|
|
received[0] << token
|
|
else
|
|
received[1] << token
|
|
end
|
|
end
|
|
assert_equal expected, received, message
|
|
end
|
|
end
|
|
|
|
def type_of?(token_name, token)
|
|
token != 'ParseError' and token_name == token.first
|
|
end
|
|
|
|
def convert_attribute_arrays_to_hashes(tokens)
|
|
tokens.inject([]) do |tokens, token|
|
|
token[2] = Hash[*token[2].reverse.flatten] if type_of?('StartTag', token)
|
|
tokens << token
|
|
end
|
|
end
|
|
|
|
def concatenate_consecutive_characters(tokens)
|
|
tokens.inject([]) do |tokens, token|
|
|
if type_of?('Character', token) and tokens.any? and type_of?('Character', tokens.last)
|
|
tokens.last[1] = tokens.last[1] + token[1]
|
|
next tokens
|
|
end
|
|
tokens << token
|
|
end
|
|
end
|
|
|
|
def tokenizer_test(data)
|
|
(data['contentModelFlags'] || [:PCDATA]).each do |content_model_flag|
|
|
message = [
|
|
'', 'Description:', data['description'],
|
|
'', 'Input:', data['input'],
|
|
'', 'Content Model Flag:', content_model_flag,
|
|
'' ] * "\n"
|
|
|
|
assert_nothing_raised message do
|
|
tokenizer = HTML5::HTMLTokenizer.new(data['input'])
|
|
|
|
tokenizer.content_model_flag = content_model_flag.to_sym
|
|
|
|
tokenizer.current_token = {:type => :startTag, :name => data['lastStartTag']} if data.has_key?('lastStartTag')
|
|
|
|
tokens = TokenizerTestParser.new(tokenizer).parse
|
|
|
|
actual = concatenate_consecutive_characters(convert_attribute_arrays_to_hashes(tokens))
|
|
|
|
expected = concatenate_consecutive_characters(data['output'])
|
|
|
|
assert_tokens_match expected, actual, data["ignoreErrorOrder"], message
|
|
end
|
|
end
|
|
end
|
|
|
|
html5_test_files('tokenizer').each do |test_file|
|
|
test_name = File.basename(test_file).sub('.test', '')
|
|
|
|
tests = JSON.parse(File.read(test_file))['tests']
|
|
|
|
tests.each_with_index do |data, index|
|
|
define_method('test_%s_%d' % [test_name, index + 1]) { tokenizer_test data }
|
|
end
|
|
end
|
|
|
|
end
|
|
|