Update to latest HTML5lib, Add Maruku testdir

Sync with the latest html5lib.
Having the Maruku unit tests on-hand may be useful for debugging; so let's include them.
This commit is contained in:
Jacques Distler 2008-01-08 00:01:35 -06:00
parent ebc409e1a0
commit 1085168bbf
337 changed files with 21290 additions and 72 deletions

View file

@ -1,13 +0,0 @@
module Kernel
def silence
if $VERBOSE
$VERBOSE = false
yield
$VERBOSE = true
else
yield
end
end
end

View file

@ -1,17 +0,0 @@
class String
alias old_format %
define_method("%") do |data|
unless data.kind_of?(Hash)
$VERBOSE = false
r = old_format(data)
$VERBOSE = true
r
else
ret = self.clone
data.each do |k,v|
ret.gsub!(/\%\(#{k}\)/, v)
end
ret
end
end
end

View file

@ -1,6 +1,5 @@
$:.unshift File.dirname(__FILE__), 'lib'
require 'html5'
require 'core_ext/string'
require 'ostruct'
require 'optparse'
@ -190,7 +189,7 @@ module HTML5::CLI
t1 = Time.new
print_output(p, document, opts)
t2 = Time.new
puts "\n\nRun took: %fs (plus %fs to print the output)"%[t1-t0, t2-t1]
puts "\n\nRun took: #{t1-t0}s (plus #{t2-t1}s to print the output)"
else
document = p.send(opts.parsemethod, *args)
print_output(p, document, opts)
@ -218,14 +217,32 @@ module HTML5::CLI
if opts.error
errList=[]
for pos, errorcode, datavars in parser.errors
errList << "Line #{pos[0]} Col #{pos[1]} " + (HTML5::E[errorcode] || "Unknown error \"#{errorcode}\"") % datavars
formatstring = HTML5::E[errorcode] || 'Unknown error "%(errorcode)"'
message = PythonicTemplate.new(formatstring).to_s(datavars)
errList << "Line #{pos[0]} Col #{pos[1]} " + message
end
$stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
end
end
class PythonicTemplate
# convert Python format string into a Ruby string, ready to eval
def initialize format
@format = format
@format.gsub!('"', '\\"')
@format.gsub!(/%\((\w+)\)/, '#{@_\1}')
@format = '"' + @format + '"'
end
# evaluate string
def to_s(vars=nil)
vars.each {|var,value| eval "@_#{var}=#{value.dump}"} if vars
eval @format
end
end
def self.run
options = parse_opts ARGV
parse options, ARGV
end
end
end

View file

@ -46,8 +46,8 @@ module HTML5
@tree = TreeBuilders::REXML::TreeBuilder
options.each {|name, value| instance_variable_set("@#{name}", value) }
@lowercase_attr_name = nil unless instance_variables.include?("@lowercase_attr_name")
@lowercase_element_name = nil unless instance_variables.include?("@lowercase_element_name")
@lowercase_attr_name = nil unless instance_variable_defined?("@lowercase_attr_name")
@lowercase_element_name = nil unless instance_variable_defined?("@lowercase_element_name")
@tree = @tree.new

View file

@ -1,5 +1,4 @@
require 'html5/html5parser/phase'
require 'core_ext/kernel'
module HTML5
class InBodyPhase < Phase
@ -51,10 +50,8 @@ module HTML5
super(parser, tree)
# for special handling of whitespace in <pre>
silence do
class << self
alias processSpaceCharactersNonPre processSpaceCharacters
end
class << self
alias processSpaceCharactersNonPre processSpaceCharacters
end
end
@ -62,9 +59,8 @@ module HTML5
# #Sometimes (start of <pre> blocks) we want to drop leading newlines
class << self
silence do
alias processSpaceCharacters processSpaceCharactersNonPre
end
remove_method :processSpaceCharacters rescue nil
alias processSpaceCharacters processSpaceCharactersNonPre
end
if (data.length > 0 and data[0] == ?\n &&
@ -119,9 +115,8 @@ module HTML5
@tree.insert_element(name, attributes)
if name == 'pre'
class << self
silence do
alias processSpaceCharacters processSpaceCharactersDropNewline
end
remove_method :processSpaceCharacters rescue nil
alias processSpaceCharacters processSpaceCharactersDropNewline
end
end
end
@ -293,7 +288,10 @@ module HTML5
# XXX Form element pointer checking here as well...
@tree.insert_element(name, attributes)
@parser.tokenizer.content_model_flag = :RCDATA
class << self; alias processSpaceCharacters processSpaceCharactersDropNewline; end
class << self
remove_method :processSpaceCharacters rescue nil
alias processSpaceCharacters processSpaceCharactersDropNewline
end
end
# iframe, noembed noframes, noscript(if scripting enabled)

View file

@ -33,6 +33,11 @@ module HTML5
options.each {|name, value| instance_variable_set("@#{name}", value) }
# partial Ruby 1.9 support
if @encoding and source.respond_to? :force_encoding
source.force_encoding(@encoding) rescue nil
end
# Raw Stream
@raw_stream = open_stream(source)
@ -265,6 +270,38 @@ module HTML5
@tell += 1
case c
when String
# partial Ruby 1.9 support
case c
when "\0"
@errors.push("null-character")
c = "\uFFFD" # null characters are invalid
when "\r"
@tell += 1 if @buffer[@tell] == "\n"
c = "\n"
when "\x80" .. "\x9F"
c = ''.force_encoding('UTF-8') << ENTITIES_WINDOWS1252[c.ord-0x80]
end
if c == "\x0D"
# normalize newlines
@tell += 1 if @buffer[@tell] == 0x0A
c = 0x0A
end
# update position in stream
if c == "\x0a"
@line_lengths << @col
@line += 1
@col = 0
else
@col += 1
end
# binary utf-8
c.ord > 126 ? [c.ord].pack('U') : c
when 0x01..0x7F
if c == 0x0D
# normalize newlines
@ -293,7 +330,7 @@ module HTML5
end
when 0xC0..0xFF
if instance_variables.include?("@win1252") && @win1252
if instance_variable_defined?("@win1252") && @win1252
"\xC3" + (c - 64).chr # convert to utf-8
# from http://www.w3.org/International/questions/qa-forms-utf-8.en.php
elsif @buffer[@tell - 1..@tell + 3] =~ /^
@ -340,7 +377,12 @@ module HTML5
end
def unget(characters)
@queue.unshift(*characters.to_a) unless characters == :EOF
return if characters == :EOF
if characters.respond_to? :to_a
@queue.unshift(*characters.to_a)
else
characters.reverse.each_char {|c| @queue.unshift(c)}
end
end
end

View file

@ -31,7 +31,7 @@ module HTML5
@inject_meta_charset = true
options.each do |name, value|
next unless instance_variables.include?("@#{name}")
next unless instance_variable_defined?("@#{name}")
@use_best_quote_char = false if name.to_s == 'quote_char'
instance_variable_set("@#{name}", value)
end

View file

@ -1,3 +1,3 @@
module HTML5
VERSION = '0.10.0'
end
VERSION = '0.10.1'
end