Update to latest HTML5lib, Add Maruku testdir
Sync with the latest html5lib. Having the Maruku unit tests on-hand may be useful for debugging; so let's include them.
This commit is contained in:
parent
ebc409e1a0
commit
1085168bbf
337 changed files with 21290 additions and 72 deletions
46
vendor/plugins/HTML5lib/lib/html5/inputstream.rb
vendored
46
vendor/plugins/HTML5lib/lib/html5/inputstream.rb
vendored
|
@ -33,6 +33,11 @@ module HTML5
|
|||
|
||||
options.each {|name, value| instance_variable_set("@#{name}", value) }
|
||||
|
||||
# partial Ruby 1.9 support
|
||||
if @encoding and source.respond_to? :force_encoding
|
||||
source.force_encoding(@encoding) rescue nil
|
||||
end
|
||||
|
||||
# Raw Stream
|
||||
@raw_stream = open_stream(source)
|
||||
|
||||
|
@ -265,6 +270,38 @@ module HTML5
|
|||
@tell += 1
|
||||
|
||||
case c
|
||||
|
||||
when String
|
||||
# partial Ruby 1.9 support
|
||||
case c
|
||||
when "\0"
|
||||
@errors.push("null-character")
|
||||
c = "\uFFFD" # null characters are invalid
|
||||
when "\r"
|
||||
@tell += 1 if @buffer[@tell] == "\n"
|
||||
c = "\n"
|
||||
when "\x80" .. "\x9F"
|
||||
c = ''.force_encoding('UTF-8') << ENTITIES_WINDOWS1252[c.ord-0x80]
|
||||
end
|
||||
|
||||
if c == "\x0D"
|
||||
# normalize newlines
|
||||
@tell += 1 if @buffer[@tell] == 0x0A
|
||||
c = 0x0A
|
||||
end
|
||||
|
||||
# update position in stream
|
||||
if c == "\x0a"
|
||||
@line_lengths << @col
|
||||
@line += 1
|
||||
@col = 0
|
||||
else
|
||||
@col += 1
|
||||
end
|
||||
|
||||
# binary utf-8
|
||||
c.ord > 126 ? [c.ord].pack('U') : c
|
||||
|
||||
when 0x01..0x7F
|
||||
if c == 0x0D
|
||||
# normalize newlines
|
||||
|
@ -293,7 +330,7 @@ module HTML5
|
|||
end
|
||||
|
||||
when 0xC0..0xFF
|
||||
if instance_variables.include?("@win1252") && @win1252
|
||||
if instance_variable_defined?("@win1252") && @win1252
|
||||
"\xC3" + (c - 64).chr # convert to utf-8
|
||||
# from http://www.w3.org/International/questions/qa-forms-utf-8.en.php
|
||||
elsif @buffer[@tell - 1..@tell + 3] =~ /^
|
||||
|
@ -340,7 +377,12 @@ module HTML5
|
|||
end
|
||||
|
||||
def unget(characters)
|
||||
@queue.unshift(*characters.to_a) unless characters == :EOF
|
||||
return if characters == :EOF
|
||||
if characters.respond_to? :to_a
|
||||
@queue.unshift(*characters.to_a)
|
||||
else
|
||||
characters.reverse.each_char {|c| @queue.unshift(c)}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue