Update to latest HTML5lib, Add Maruku testdir
Sync with the latest html5lib. Having the Maruku unit tests on-hand may be useful for debugging; so let's include them.
This commit is contained in:
parent
ebc409e1a0
commit
1085168bbf
337 changed files with 21290 additions and 72 deletions
25
vendor/plugins/HTML5lib/lib/html5/cli.rb
vendored
25
vendor/plugins/HTML5lib/lib/html5/cli.rb
vendored
|
@ -1,6 +1,5 @@
|
|||
$:.unshift File.dirname(__FILE__), 'lib'
|
||||
require 'html5'
|
||||
require 'core_ext/string'
|
||||
require 'ostruct'
|
||||
require 'optparse'
|
||||
|
||||
|
@ -190,7 +189,7 @@ module HTML5::CLI
|
|||
t1 = Time.new
|
||||
print_output(p, document, opts)
|
||||
t2 = Time.new
|
||||
puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1]
|
||||
puts "\n\nRun took: #{t1-t0}s (plus #{t2-t1}s to print the output)"
|
||||
else
|
||||
document = p.send(opts.parsemethod, *args)
|
||||
print_output(p, document, opts)
|
||||
|
@ -218,14 +217,32 @@ module HTML5::CLI
|
|||
if opts.error
|
||||
errList=[]
|
||||
for pos, errorcode, datavars in parser.errors
|
||||
errList << "Line #{pos[0]} Col #{pos[1]} " + (HTML5::E[errorcode] || "Unknown error \"#{errorcode}\"") % datavars
|
||||
formatstring = HTML5::E[errorcode] || 'Unknown error "%(errorcode)"'
|
||||
message = PythonicTemplate.new(formatstring).to_s(datavars)
|
||||
errList << "Line #{pos[0]} Col #{pos[1]} " + message
|
||||
end
|
||||
$stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
|
||||
end
|
||||
end
|
||||
|
||||
class PythonicTemplate
|
||||
# convert Python format string into a Ruby string, ready to eval
|
||||
def initialize format
|
||||
@format = format
|
||||
@format.gsub!('"', '\\"')
|
||||
@format.gsub!(/%\((\w+)\)/, '#{@_\1}')
|
||||
@format = '"' + @format + '"'
|
||||
end
|
||||
|
||||
# evaluate string
|
||||
def to_s(vars=nil)
|
||||
vars.each {|var,value| eval "@_#{var}=#{value.dump}"} if vars
|
||||
eval @format
|
||||
end
|
||||
end
|
||||
|
||||
def self.run
|
||||
options = parse_opts ARGV
|
||||
parse options, ARGV
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -46,8 +46,8 @@ module HTML5
|
|||
@tree = TreeBuilders::REXML::TreeBuilder
|
||||
|
||||
options.each {|name, value| instance_variable_set("@#{name}", value) }
|
||||
@lowercase_attr_name = nil unless instance_variables.include?("@lowercase_attr_name")
|
||||
@lowercase_element_name = nil unless instance_variables.include?("@lowercase_element_name")
|
||||
@lowercase_attr_name = nil unless instance_variable_defined?("@lowercase_attr_name")
|
||||
@lowercase_element_name = nil unless instance_variable_defined?("@lowercase_element_name")
|
||||
|
||||
@tree = @tree.new
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
require 'html5/html5parser/phase'
|
||||
require 'core_ext/kernel'
|
||||
|
||||
module HTML5
|
||||
class InBodyPhase < Phase
|
||||
|
@ -51,10 +50,8 @@ module HTML5
|
|||
super(parser, tree)
|
||||
|
||||
# for special handling of whitespace in <pre>
|
||||
silence do
|
||||
class << self
|
||||
alias processSpaceCharactersNonPre processSpaceCharacters
|
||||
end
|
||||
class << self
|
||||
alias processSpaceCharactersNonPre processSpaceCharacters
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -62,9 +59,8 @@ module HTML5
|
|||
# #Sometimes (start of <pre> blocks) we want to drop leading newlines
|
||||
|
||||
class << self
|
||||
silence do
|
||||
alias processSpaceCharacters processSpaceCharactersNonPre
|
||||
end
|
||||
remove_method :processSpaceCharacters rescue nil
|
||||
alias processSpaceCharacters processSpaceCharactersNonPre
|
||||
end
|
||||
|
||||
if (data.length > 0 and data[0] == ?\n &&
|
||||
|
@ -119,9 +115,8 @@ module HTML5
|
|||
@tree.insert_element(name, attributes)
|
||||
if name == 'pre'
|
||||
class << self
|
||||
silence do
|
||||
alias processSpaceCharacters processSpaceCharactersDropNewline
|
||||
end
|
||||
remove_method :processSpaceCharacters rescue nil
|
||||
alias processSpaceCharacters processSpaceCharactersDropNewline
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -293,7 +288,10 @@ module HTML5
|
|||
# XXX Form element pointer checking here as well...
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.tokenizer.content_model_flag = :RCDATA
|
||||
class << self; alias processSpaceCharacters processSpaceCharactersDropNewline; end
|
||||
class << self
|
||||
remove_method :processSpaceCharacters rescue nil
|
||||
alias processSpaceCharacters processSpaceCharactersDropNewline
|
||||
end
|
||||
end
|
||||
|
||||
# iframe, noembed noframes, noscript(if scripting enabled)
|
||||
|
|
46
vendor/plugins/HTML5lib/lib/html5/inputstream.rb
vendored
46
vendor/plugins/HTML5lib/lib/html5/inputstream.rb
vendored
|
@ -33,6 +33,11 @@ module HTML5
|
|||
|
||||
options.each {|name, value| instance_variable_set("@#{name}", value) }
|
||||
|
||||
# partial Ruby 1.9 support
|
||||
if @encoding and source.respond_to? :force_encoding
|
||||
source.force_encoding(@encoding) rescue nil
|
||||
end
|
||||
|
||||
# Raw Stream
|
||||
@raw_stream = open_stream(source)
|
||||
|
||||
|
@ -265,6 +270,38 @@ module HTML5
|
|||
@tell += 1
|
||||
|
||||
case c
|
||||
|
||||
when String
|
||||
# partial Ruby 1.9 support
|
||||
case c
|
||||
when "\0"
|
||||
@errors.push("null-character")
|
||||
c = "\uFFFD" # null characters are invalid
|
||||
when "\r"
|
||||
@tell += 1 if @buffer[@tell] == "\n"
|
||||
c = "\n"
|
||||
when "\x80" .. "\x9F"
|
||||
c = ''.force_encoding('UTF-8') << ENTITIES_WINDOWS1252[c.ord-0x80]
|
||||
end
|
||||
|
||||
if c == "\x0D"
|
||||
# normalize newlines
|
||||
@tell += 1 if @buffer[@tell] == 0x0A
|
||||
c = 0x0A
|
||||
end
|
||||
|
||||
# update position in stream
|
||||
if c == "\x0a"
|
||||
@line_lengths << @col
|
||||
@line += 1
|
||||
@col = 0
|
||||
else
|
||||
@col += 1
|
||||
end
|
||||
|
||||
# binary utf-8
|
||||
c.ord > 126 ? [c.ord].pack('U') : c
|
||||
|
||||
when 0x01..0x7F
|
||||
if c == 0x0D
|
||||
# normalize newlines
|
||||
|
@ -293,7 +330,7 @@ module HTML5
|
|||
end
|
||||
|
||||
when 0xC0..0xFF
|
||||
if instance_variables.include?("@win1252") && @win1252
|
||||
if instance_variable_defined?("@win1252") && @win1252
|
||||
"\xC3" + (c - 64).chr # convert to utf-8
|
||||
# from http://www.w3.org/International/questions/qa-forms-utf-8.en.php
|
||||
elsif @buffer[@tell - 1..@tell + 3] =~ /^
|
||||
|
@ -340,7 +377,12 @@ module HTML5
|
|||
end
|
||||
|
||||
def unget(characters)
|
||||
@queue.unshift(*characters.to_a) unless characters == :EOF
|
||||
return if characters == :EOF
|
||||
if characters.respond_to? :to_a
|
||||
@queue.unshift(*characters.to_a)
|
||||
else
|
||||
characters.reverse.each_char {|c| @queue.unshift(c)}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ module HTML5
|
|||
@inject_meta_charset = true
|
||||
|
||||
options.each do |name, value|
|
||||
next unless instance_variables.include?("@#{name}")
|
||||
next unless instance_variable_defined?("@#{name}")
|
||||
@use_best_quote_char = false if name.to_s == 'quote_char'
|
||||
instance_variable_set("@#{name}", value)
|
||||
end
|
||||
|
|
4
vendor/plugins/HTML5lib/lib/html5/version.rb
vendored
4
vendor/plugins/HTML5lib/lib/html5/version.rb
vendored
|
@ -1,3 +1,3 @@
|
|||
module HTML5
|
||||
VERSION = '0.10.0'
|
||||
end
|
||||
VERSION = '0.10.1'
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue