diff --git a/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb b/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
index 2f11e2d8..387e987c 100755
--- a/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
@@ -59,7 +59,8 @@ module HTML5lib
begin
require 'iconv'
uString = Iconv.iconv('utf-8', @char_encoding, uString)[0]
- rescue
+ rescue LoadError
+ rescue Exception
end
end
@@ -206,21 +207,36 @@ module HTML5lib
unless @queue.empty?
return @queue.shift
else
- begin
- @tell += 1
- c = @data_stream[@tell - 1]
- case c
- when 0xC2 .. 0xDF
+ @tell += 1
+ c = @data_stream[@tell - 1]
+ case c
+ when 0xC2 .. 0xDF
+ if @data_stream[@tell .. @tell] =~ /[\x80-\xBF]/
@tell += 1
- c.chr + @data_stream[@tell-1].chr
- when 0xE0 .. 0xF0
- @tell += 2
- c.chr + @data_stream[@tell-2].chr + @data_stream[@tell-1].chr
+ @data_stream[@tell-2..@tell-1]
else
- c.chr
+ [0xFFFD].pack('U')
+ end
+ when 0xE0 .. 0xEF
+ if @data_stream[@tell .. @tell+1] =~ /[\x80-\xBF]{2}/
+ @tell += 2
+ @data_stream[@tell-3..@tell-1]
+ else
+ [0xFFFD].pack('U')
+ end
+ when 0xF0 .. 0xF3
+ if @data_stream[@tell .. @tell+2] =~ /[\x80-\xBF]{3}/
+ @tell += 3
+ @data_stream[@tell-4..@tell-1]
+ else
+ [0xFFFD].pack('U')
+ end
+ else
+ begin
+ c.chr
+ rescue
+ :EOF
end
- rescue
- return :EOF
end
end
end
diff --git a/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb b/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb
index 0f090191..ab133a36 100644
--- a/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb
+++ b/vendor/plugins/HTML5lib/lib/html5lib/serializer.rb
@@ -1,5 +1,4 @@
require 'html5lib/constants'
-require 'jcode'
module HTML5lib
@@ -309,7 +308,7 @@ class HTMLSerializer
if @quote_attr_values or v.empty?
quote_attr = true
else
- quote_attr = (SPACE_CHARACTERS.join('') + "<>\"'").each_char.any? {|c| v.include?(c)}
+ quote_attr = (SPACE_CHARACTERS + %w(< > " ')).any? {|c| v.include?(c)}
end
v = v.gsub("&", "&")
if encoding
diff --git a/vendor/plugins/HTML5lib/tests/preamble.rb b/vendor/plugins/HTML5lib/tests/preamble.rb
index 164be8b1..17307e16 100644
--- a/vendor/plugins/HTML5lib/tests/preamble.rb
+++ b/vendor/plugins/HTML5lib/tests/preamble.rb
@@ -15,7 +15,7 @@ begin
rescue LoadError
class JSON
def self.parse json
- json.gsub! /"\s*:/, '"=>'
+ json.gsub!(/"\s*:/, '"=>')
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
eval json
end
diff --git a/vendor/plugins/HTML5lib/tests/test_stream.rb b/vendor/plugins/HTML5lib/tests/test_stream.rb
index e2d6fe78..ed5b535a 100755
--- a/vendor/plugins/HTML5lib/tests/test_stream.rb
+++ b/vendor/plugins/HTML5lib/tests/test_stream.rb
@@ -22,22 +22,28 @@ class HTMLInputStreamTest < Test::Unit::TestCase
assert_equal("\xe2\x80\x98", stream.char)
end
- def test_char_win1252
- stream = HTMLInputStream.new("\x91")
- assert_equal('windows-1252', stream.char_encoding)
- assert_equal("\xe2\x80\x98", stream.char)
- end
-
def test_bom
stream = HTMLInputStream.new("\xef\xbb\xbf" + "'")
assert_equal('utf-8', stream.char_encoding)
assert_equal("'", stream.char)
end
- def test_utf_16
- stream = HTMLInputStream.new("\xff\xfe" + " \x00"*1025)
- assert(stream.char_encoding, 'utf-16-le')
- assert_equal(1025, stream.chars_until(' ',true).length)
+ begin
+ require 'iconv'
+
+ def test_char_win1252
+ stream = HTMLInputStream.new("\x91")
+ assert_equal('windows-1252', stream.char_encoding)
+ assert_equal("\xe2\x80\x98", stream.char)
+ end
+
+ def test_utf_16
+ stream = HTMLInputStream.new("\xff\xfe" + " \x00"*1025)
+ assert(stream.char_encoding, 'utf-16-le')
+ assert_equal(1025, stream.chars_until(' ',true).length)
+ end
+ rescue LoadError
+ puts "iconv not found, skipping iconv tests"
end
def test_newlines
diff --git a/vendor/plugins/HTML5lib/tests/tokenizer_test_parser.rb b/vendor/plugins/HTML5lib/tests/tokenizer_test_parser.rb
index d48c458f..5126fa11 100644
--- a/vendor/plugins/HTML5lib/tests/tokenizer_test_parser.rb
+++ b/vendor/plugins/HTML5lib/tests/tokenizer_test_parser.rb
@@ -11,7 +11,7 @@ class TokenizerTestParser
debug = nil
for token in @tokenizer
debug = token.inspect if token[:type] == :ParseError
- send ('process' + token[:type].to_s), token
+ send(('process' + token[:type].to_s), token)
end
return @outputTokens