Sync with Latest HTML5lib

Some more tweaks
This commit is contained in:
Jacques Distler 2007-06-06 08:12:03 -05:00
parent fd183eac04
commit 8846b2cda5
5 changed files with 48 additions and 27 deletions

View file

@ -15,7 +15,7 @@ begin
rescue LoadError
class JSON
def self.parse json
json.gsub! /"\s*:/, '"=>'
json.gsub!(/"\s*:/, '"=>')
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
eval json
end

View file

@ -22,22 +22,28 @@ class HTMLInputStreamTest < Test::Unit::TestCase
assert_equal("\xe2\x80\x98", stream.char)
end
def test_char_win1252
stream = HTMLInputStream.new("\x91")
assert_equal('windows-1252', stream.char_encoding)
assert_equal("\xe2\x80\x98", stream.char)
end
def test_bom
stream = HTMLInputStream.new("\xef\xbb\xbf" + "'")
assert_equal('utf-8', stream.char_encoding)
assert_equal("'", stream.char)
end
def test_utf_16
stream = HTMLInputStream.new("\xff\xfe" + " \x00"*1025)
assert(stream.char_encoding, 'utf-16-le')
assert_equal(1025, stream.chars_until(' ',true).length)
begin
require 'iconv'
def test_char_win1252
stream = HTMLInputStream.new("\x91")
assert_equal('windows-1252', stream.char_encoding)
assert_equal("\xe2\x80\x98", stream.char)
end
def test_utf_16
stream = HTMLInputStream.new("\xff\xfe" + " \x00"*1025)
assert(stream.char_encoding, 'utf-16-le')
assert_equal(1025, stream.chars_until(' ',true).length)
end
rescue LoadError
puts "iconv not found, skipping iconv tests"
end
def test_newlines

View file

@ -11,7 +11,7 @@ class TokenizerTestParser
debug = nil
for token in @tokenizer
debug = token.inspect if token[:type] == :ParseError
send ('process' + token[:type].to_s), token
send(('process' + token[:type].to_s), token)
end
return @outputTokens