Ruby 1.9 Compatibility

Completely removed the html5lib sanitizer.
Fixed the string-handling to work in both
Ruby 1.8.x and 1.9.2. There are still,
inexplicably, two functional tests that
fail. But the rest seems to work quite well.
This commit is contained in:
Jacques Distler 2009-11-30 16:28:18 -06:00
parent 79c8572053
commit a6429f8c22
142 changed files with 519 additions and 843 deletions

View file

@ -0,0 +1,94 @@
require File.join(File.dirname(__FILE__), 'preamble')
require 'html5/tokenizer'
require 'tokenizer_test_parser'
class Html5TokenizerTestCase < Test::Unit::TestCase
def assert_tokens_match(expectedTokens, receivedTokens, ignoreErrorOrder, message)
if !ignoreErrorOrder
return expectedTokens == receivedTokens
else
#Sort the tokens into two groups; non-parse errors and parse errors
expected = [[],[]]
received = [[],[]]
for token in expectedTokens
if token != "ParseError"
expected[0] << token
else
expected[1] << token
end
end
for token in receivedTokens
if token != "ParseError"
received[0] << token
else
received[1] << token
end
end
assert_equal expected, received, message
end
end
def type_of?(token_name, token)
token != 'ParseError' and token_name == token.first
end
def convert_attribute_arrays_to_hashes(tokens)
tokens.inject([]) do |tokens, token|
token[2] = Hash[*token[2].reverse.flatten] if type_of?('StartTag', token)
tokens << token
end
end
def concatenate_consecutive_characters(tokens)
tokens.inject([]) do |tokens, token|
if type_of?('Character', token) and tokens.any? and type_of?('Character', tokens.last)
tokens.last[1] = tokens.last[1] + token[1]
next tokens
end
tokens << token
end
end
def tokenizer_test(data)
(data['contentModelFlags'] || [:PCDATA]).each do |content_model_flag|
message = [
'', 'Description:', data['description'],
'', 'Input:', data['input'],
'', 'Content Model Flag:', content_model_flag,
'' ] * "\n"
assert_nothing_raised message do
tokenizer = HTML5::HTMLTokenizer.new(data['input'])
tokenizer.content_model_flag = content_model_flag.to_sym
tokenizer.current_token = {:type => :startTag, :name => data['lastStartTag']} if data.has_key?('lastStartTag')
tokens = TokenizerTestParser.new(tokenizer).parse
actual = concatenate_consecutive_characters(convert_attribute_arrays_to_hashes(tokens))
expected = concatenate_consecutive_characters(data['output'])
assert_tokens_match expected, actual, data["ignoreErrorOrder"], message
end
end
end
html5_test_files('tokenizer').each do |test_file|
test_name = File.basename(test_file).sub('.test', '')
tests = JSON.parse(File.read(test_file))['tests']
tests.each_with_index do |data, index|
define_method('test_%s_%d' % [test_name, index + 1]) { tokenizer_test data }
end
end
end