Ruby 1.9 Compatibility

Completely removed the html5lib sanitizer.
Fixed the string-handling to work in both
Ruby 1.8.x and 1.9.2. There are still,
inexplicably, two functional tests that
fail. But the rest seems to work quite well.
This commit is contained in:
Jacques Distler 2009-11-30 16:28:18 -06:00
parent 79c8572053
commit a6429f8c22
142 changed files with 519 additions and 843 deletions

View file

@ -0,0 +1,154 @@
module HTML5
# Base class for helper objects that implement each phase of processing.
#
# Handler methods should be in the following order (they can be omitted):
#
# * EOF
# * Comment
# * Doctype
# * SpaceCharacters
# * Characters
# * StartTag
# - startTag* methods
# * EndTag
# - endTag* methods
#
class Phase
extend Forwardable
def_delegators :@parser, :parse_error
# The following example call:
#
# tag_handlers('startTag', 'html', %w( base link meta ), %w( li dt dd ) => 'ListItem')
#
# ...would return a hash equal to this:
#
# { 'html' => 'startTagHtml',
# 'base' => 'startTagBaseLinkMeta',
# 'link' => 'startTagBaseLinkMeta',
# 'meta' => 'startTagBaseLinkMeta',
# 'li' => 'startTagListItem',
# 'dt' => 'startTagListItem',
# 'dd' => 'startTagListItem' }
#
def self.tag_handlers(prefix, *tags)
mapping = {}
if tags.last.is_a?(Hash)
tags.pop.each do |names, handler_method_suffix|
handler_method = prefix + handler_method_suffix
Array(names).each {|name| mapping[name] = handler_method }
end
end
tags.each do |names|
names = Array(names)
handler_method = prefix + names.map {|name| name.capitalize }.join
names.each {|name| mapping[name] = handler_method }
end
mapping
end
def self.start_tag_handlers
@start_tag_handlers ||= Hash.new('startTagOther')
end
# Declare what start tags this Phase handles. Can be called more than once.
#
# Example usage:
#
# handle_start 'html'
# # html start tags will be handled by a method named 'startTagHtml'
#
# handle_start %( base link meta )
# # base, link and meta start tags will be handled by a method named 'startTagBaseLinkMeta'
#
# handle_start %( li dt dd ) => 'ListItem'
# # li, dt, and dd start tags will be handled by a method named 'startTagListItem'
#
def self.handle_start(*tags)
start_tag_handlers.update tag_handlers('startTag', *tags)
end
def self.end_tag_handlers
@end_tag_handlers ||= Hash.new('endTagOther')
end
# Declare what end tags this Phase handles. Behaves like handle_start.
#
def self.handle_end(*tags)
end_tag_handlers.update tag_handlers('endTag', *tags)
end
def initialize(parser, tree)
@parser, @tree = parser, tree
end
def process_eof
@tree.generateImpliedEndTags
if @tree.open_elements.length > 2
parse_error("expected-closing-tag-but-got-eof")
elsif @tree.open_elements.length == 2 and @tree.open_elements[1].name != 'body'
# This happens for framesets or something?
parse_error("expected-closing-tag-but-got-eof")
elsif @parser.inner_html and @tree.open_elements.length > 1
# XXX This is not what the specification says. Not sure what to do here.
parse_error("eof-in-innerhtml")
end
# Betting ends.
end
def processComment(data)
# For most phases the following is correct. Where it's not it will be
# overridden.
@tree.insert_comment(data, @tree.open_elements.last)
end
def processDoctype(name, publicId, systemId, correct)
parse_error("unexpected-doctype")
end
def processSpaceCharacters(data)
@tree.insertText(data)
end
def processStartTag(name, attributes)
send self.class.start_tag_handlers[name], name, attributes
end
def startTagHtml(name, attributes)
if @parser.first_start_tag == false and name == 'html'
parse_error("non-html-root")
end
# XXX Need a check here to see if the first start tag token emitted is
# this token... If it's not, invoke parse_error.
attributes.each do |attr, value|
unless @tree.open_elements.first.attributes.has_key?(attr)
@tree.open_elements.first.attributes[attr] = value
end
end
@parser.first_start_tag = false
end
def processEndTag(name)
send self.class.end_tag_handlers[name], name
end
def assert(value)
throw AssertionError.new unless value
end
def in_scope?(*args)
@tree.elementInScope(*args)
end
def remove_open_elements_until(name=nil)
finished = false
until finished || @tree.open_elements.length == 0
element = @tree.open_elements.pop
finished = name.nil? ? yield(element) : element.name == name
end
return element
end
end
end