diff --git a/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb b/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb index 2f11e2d8..387e987c 100755 --- a/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb +++ b/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb @@ -59,7 +59,8 @@ module HTML5lib begin require 'iconv' uString = Iconv.iconv('utf-8', @char_encoding, uString)[0] - rescue + rescue LoadError + rescue Exception end end @@ -206,21 +207,36 @@ module HTML5lib unless @queue.empty? return @queue.shift else - begin - @tell += 1 - c = @data_stream[@tell - 1] - case c - when 0xC2 .. 0xDF + @tell += 1 + c = @data_stream[@tell - 1] + case c + when 0xC2 .. 0xDF + if @data_stream[@tell .. @tell] =~ /[\x80-\xBF]/ @tell += 1 - c.chr + @data_stream[@tell-1].chr - when 0xE0 .. 0xF0 - @tell += 2 - c.chr + @data_stream[@tell-2].chr + @data_stream[@tell-1].chr + @data_stream[@tell-2..@tell-1] else - c.chr + [0xFFFD].pack('U') + end + when 0xE0 .. 0xEF + if @data_stream[@tell .. @tell+1] =~ /[\x80-\xBF]{2}/ + @tell += 2 + @data_stream[@tell-3..@tell-1] + else + [0xFFFD].pack('U') + end + when 0xF0 .. 0xF3 + if @data_stream[@tell .. @tell+2] =~ /[\x80-\xBF]{3}/ + @tell += 3 + @data_stream[@tell-4..@tell-1] + else + [0xFFFD].pack('U') + end + else + begin + c.chr + rescue + :EOF end - rescue - return :EOF end end end diff --git a/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb b/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb index 0f090191..ab133a36 100644 --- a/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb +++ b/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb @@ -1,5 +1,4 @@ require 'html5lib/constants' -require 'jcode' module HTML5lib @@ -309,7 +308,7 @@ class HTMLSerializer if @quote_attr_values or v.empty? quote_attr = true else - quote_attr = (SPACE_CHARACTERS.join('') + "<>\"'").each_char.any? {|c| v.include?(c)} + quote_attr = (SPACE_CHARACTERS + %w(< > " ')).any? {|c| v.include?(c)} end v = v.gsub("&", "&") if encoding diff --git a/vendor/plugins/HTML5lib/tests/preamble.rb b/vendor/plugins/HTML5lib/tests/preamble.rb index 164be8b1..17307e16 100644 --- a/vendor/plugins/HTML5lib/tests/preamble.rb +++ b/vendor/plugins/HTML5lib/tests/preamble.rb @@ -15,7 +15,7 @@ begin rescue LoadError class JSON def self.parse json - json.gsub! /"\s*:/, '"=>' + json.gsub!(/"\s*:/, '"=>') json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')} eval json end diff --git a/vendor/plugins/HTML5lib/tests/test_stream.rb b/vendor/plugins/HTML5lib/tests/test_stream.rb index e2d6fe78..ed5b535a 100755 --- a/vendor/plugins/HTML5lib/tests/test_stream.rb +++ b/vendor/plugins/HTML5lib/tests/test_stream.rb @@ -22,22 +22,28 @@ class HTMLInputStreamTest < Test::Unit::TestCase assert_equal("\xe2\x80\x98", stream.char) end - def test_char_win1252 - stream = HTMLInputStream.new("\x91") - assert_equal('windows-1252', stream.char_encoding) - assert_equal("\xe2\x80\x98", stream.char) - end - def test_bom stream = HTMLInputStream.new("\xef\xbb\xbf" + "'") assert_equal('utf-8', stream.char_encoding) assert_equal("'", stream.char) end - def test_utf_16 - stream = HTMLInputStream.new("\xff\xfe" + " \x00"*1025) - assert(stream.char_encoding, 'utf-16-le') - assert_equal(1025, stream.chars_until(' ',true).length) + begin + require 'iconv' + + def test_char_win1252 + stream = HTMLInputStream.new("\x91") + assert_equal('windows-1252', stream.char_encoding) + assert_equal("\xe2\x80\x98", stream.char) + end + + def test_utf_16 + stream = HTMLInputStream.new("\xff\xfe" + " \x00"*1025) + assert(stream.char_encoding, 'utf-16-le') + assert_equal(1025, stream.chars_until(' ',true).length) + end + rescue LoadError + puts "iconv not found, skipping iconv tests" end def test_newlines diff --git a/vendor/plugins/HTML5lib/tests/tokenizer_test_parser.rb b/vendor/plugins/HTML5lib/tests/tokenizer_test_parser.rb index d48c458f..5126fa11 100644 --- a/vendor/plugins/HTML5lib/tests/tokenizer_test_parser.rb +++ b/vendor/plugins/HTML5lib/tests/tokenizer_test_parser.rb @@ -11,7 +11,7 @@ class TokenizerTestParser debug = nil for token in @tokenizer debug = token.inspect if token[:type] == :ParseError - send ('process' + token[:type].to_s), token + send(('process' + token[:type].to_s), token) end return @outputTokens