Ruby 1.9 Compatibility

Completely removed the html5lib sanitizer.
Fixed the string-handling to work in both
Ruby 1.8.x and 1.9.2. There are still,
inexplicably, two functional tests that
fail. But the rest seems to work quite well.
This commit is contained in:
Jacques Distler 2009-11-30 16:28:18 -06:00
parent 79c8572053
commit a6429f8c22
142 changed files with 519 additions and 843 deletions

View file

@ -0,0 +1,334 @@
require 'html5/constants'
#XXX - TODO; make the default interface more ElementTree-like rather than DOM-like
module HTML5
# The scope markers are inserted when entering buttons, object elements,
# marquees, table cells, and table captions, and are used to prevent formatting
# from "leaking" into tables, buttons, object elements, and marquees.
Marker = nil
module TreeBuilders
module Base
class Node
# The parent of the current node (or nil for the document node)
attr_accessor :parent
# a list of child nodes of the current node. This must
# include all elements but not necessarily other node types
attr_accessor :childNodes
# A list of miscellaneous flags that can be set on the node
attr_accessor :_flags
def initialize(name)
@parent = nil
@childNodes = []
@_flags = []
end
# Insert node as a child of the current node
def appendChild(node)
raise NotImplementedError
end
# Insert data as text in the current node, positioned before the
# start of node insertBefore or to the end of the node's text.
def insertText(data, insertBefore=nil)
raise NotImplementedError
end
# Insert node as a child of the current node, before refNode in the
# list of child nodes. Raises ValueError if refNode is not a child of
# the current node
def insertBefore(node, refNode)
raise NotImplementedError
end
# Remove node from the children of the current node
def removeChild(node)
raise NotImplementedError
end
# Move all the children of the current node to newParent.
# This is needed so that trees that don't store text as nodes move the
# text in the correct way
def reparentChildren(newParent)
#XXX - should this method be made more general?
@childNodes.each { |child| newParent.appendChild(child) }
@childNodes = []
end
# Return a shallow copy of the current node i.e. a node with the same
# name and attributes but with no parent or child nodes
def cloneNode
raise NotImplementedError
end
# Return true if the node has children or text, false otherwise
def hasContent
raise NotImplementedError
end
end
# Base treebuilder implementation
class TreeBuilder
attr_accessor :open_elements
attr_accessor :activeFormattingElements
attr_accessor :document
attr_accessor :head_pointer
attr_accessor :formPointer
# Class to use for document root
documentClass = nil
# Class to use for HTML elements
elementClass = nil
# Class to use for comments
commentClass = nil
# Class to use for doctypes
doctypeClass = nil
# Fragment class
fragmentClass = nil
def initialize
reset
end
def reset
@open_elements = []
@activeFormattingElements = []
#XXX - rename these to headElement, formElement
@head_pointer = nil
@formPointer = nil
self.insert_from_table = false
@document = @documentClass.new
end
def elementInScope(target, tableVariant=false)
# Exit early when possible.
return true if @open_elements[-1].name == target
# AT How about while true and simply set node to [-1] and set it to
# [-2] at the end...
@open_elements.reverse.each do |element|
if element.name == target
return true
elsif element.name == 'table'
return false
elsif not tableVariant and SCOPING_ELEMENTS.include?(element.name)
return false
elsif element.name == 'html'
return false
end
end
assert false # We should never reach this point
end
def reconstructActiveFormattingElements
# Within this algorithm the order of steps described in the
# specification is not quite the same as the order of steps in the
# code. It should still do the same though.
# Step 1: stop the algorithm when there's nothing to do.
return if @activeFormattingElements.empty?
# Step 2 and step 3: we start with the last element. So i is -1.
i = -1
entry = @activeFormattingElements[i]
return if entry == Marker or @open_elements.include?(entry)
# Step 6
until entry == Marker or @open_elements.include?(entry)
# Step 5: let entry be one earlier in the list.
i -= 1
begin
entry = @activeFormattingElements[i]
rescue
# Step 4: at this point we need to jump to step 8. By not doing
# i += 1 which is also done in step 7 we achieve that.
break
end
end
while true
# Step 7
i += 1
# Step 8
clone = @activeFormattingElements[i].cloneNode
# Step 9
element = insert_element(clone.name, clone.attributes)
# Step 10
@activeFormattingElements[i] = element
# Step 11
break if element == @activeFormattingElements[-1]
end
end
def clearActiveFormattingElements
{} until @activeFormattingElements.empty? || @activeFormattingElements.pop == Marker
end
# Check if an element exists between the end of the active
# formatting elements and the last marker. If it does, return it, else
# return false
def elementInActiveFormattingElements(name)
@activeFormattingElements.reverse.each do |element|
# Check for Marker first because if it's a Marker it doesn't have a
# name attribute.
break if element == Marker
return element if element.name == name
end
return false
end
def insertDoctype(name, public_id, system_id)
doctype = @doctypeClass.new(name)
doctype.public_id = public_id
doctype.system_id = system_id
@document.appendChild(doctype)
end
def insert_comment(data, parent=nil)
parent = @open_elements[-1] if parent.nil?
parent.appendChild(@commentClass.new(data))
end
# Create an element but don't insert it anywhere
def createElement(name, attributes)
element = @elementClass.new(name)
element.attributes = attributes
return element
end
# Switch the function used to insert an element from the
# normal one to the misnested table one and back again
def insert_from_table=(value)
@insert_from_table = value
@insert_element = value ? :insert_elementTable : :insert_elementNormal
end
def insert_element(name, attributes)
send(@insert_element, name, attributes)
end
def insert_elementNormal(name, attributes)
element = @elementClass.new(name)
element.attributes = attributes
@open_elements.last.appendChild(element)
@open_elements.push(element)
return element
end
# Create an element and insert it into the tree
def insert_elementTable(name, attributes)
element = @elementClass.new(name)
element.attributes = attributes
if TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements.last.name)
#We should be in the InTable mode. This means we want to do
#special magic element rearranging
parent, insertBefore = getTableMisnestedNodePosition
if insertBefore.nil?
parent.appendChild(element)
else
parent.insertBefore(element, insertBefore)
end
@open_elements.push(element)
else
return insert_elementNormal(name, attributes)
end
return element
end
def insertText(data, parent=nil)
parent = @open_elements[-1] if parent.nil?
if (not(@insert_from_table) or (@insert_from_table and not TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements[-1].name)))
parent.insertText(data)
else
#We should be in the InTable mode. This means we want to do
#special magic element rearranging
parent, insertBefore = getTableMisnestedNodePosition
parent.insertText(data, insertBefore)
end
end
# Get the foster parent element, and sibling to insert before
# (or nil) when inserting a misnested table node
def getTableMisnestedNodePosition
#The foster parent element is the one which comes before the most
#recently opened table element
#XXX - this is really inelegant
lastTable = nil
fosterParent = nil
insertBefore = nil
@open_elements.reverse.each do |element|
if element.name == "table"
lastTable = element
break
end
end
if lastTable
#XXX - we should really check that this parent is actually a
#node here
if lastTable.parent
fosterParent = lastTable.parent
insertBefore = lastTable
else
fosterParent = @open_elements[@open_elements.index(lastTable) - 1]
end
else
fosterParent = @open_elements[0]
end
return fosterParent, insertBefore
end
def generateImpliedEndTags(exclude=nil)
name = @open_elements[-1].name
# XXX td, th and tr are not actually needed
if (%w[dd dt li p td th tr].include?(name) and name != exclude)
@open_elements.pop
# XXX This is not entirely what the specification says. We should
# investigate it more closely.
generateImpliedEndTags(exclude)
end
end
def get_document
@document
end
def get_fragment
#assert @inner_html
fragment = @fragmentClass.new
@open_elements[0].reparentChildren(fragment)
return fragment
end
# Serialize the subtree of node in the format required by unit tests
# node - the node from which to start serializing
def testSerializer(node)
raise NotImplementedError
end
end
end
end
end

View file

@ -0,0 +1,231 @@
require 'html5/treebuilders/base'
require 'rubygems'
require 'hpricot'
require 'forwardable'
module HTML5
module TreeBuilders
module Hpricot
class Node < Base::Node
extend Forwardable
def_delegators :@hpricot, :name
attr_accessor :hpricot
def initialize(name)
super(name)
@hpricot = self.class.hpricot_class.new name
end
def appendChild(node)
if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
childNodes.last.hpricot.content = childNodes.last.hpricot.content + node.hpricot.content
else
childNodes << node
hpricot.children << node.hpricot
end
if (oldparent = node.hpricot.parent) != nil
oldparent.children.delete_at(oldparent.children.index(node.hpricot))
end
node.hpricot.parent = hpricot
node.parent = self
end
def removeChild(node)
childNodes.delete(node)
hpricot.children.delete_at(hpricot.children.index(node.hpricot))
node.hpricot.parent = nil
node.parent = nil
end
def insertText(data, before=nil)
if before
insertBefore(TextNode.new(data), before)
else
appendChild(TextNode.new(data))
end
end
def insertBefore(node, refNode)
index = childNodes.index(refNode)
if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
else
refNode.hpricot.parent.insert_before(node.hpricot,refNode.hpricot)
childNodes.insert(index, node)
end
end
def hasContent
childNodes.any?
end
end
class Element < Node
def self.hpricot_class
::Hpricot::Elem
end
def initialize(name)
super(name)
@hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
end
def name
@hpricot.stag.name
end
def cloneNode
attributes.inject(self.class.new(name)) do |node, (name, value)|
node.hpricot[name] = value
node
end
end
# A call to Hpricot::Elem#raw_attributes is built dynamically,
# so alterations to the returned value (a hash) will be lost.
#
# AttributeProxy works around this by forwarding :[]= calls
# to the raw_attributes accessor on the element start tag.
#
class AttributeProxy
def initialize(hpricot)
@hpricot = hpricot
end
def []=(k, v)
@hpricot.stag.send(stag_attributes_method)[k] = v
end
def stag_attributes_method
# STag#attributes changed to STag#raw_attributes after Hpricot 0.5
@hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
end
def method_missing(*a, &b)
@hpricot.attributes.send(*a, &b)
end
end
def attributes
AttributeProxy.new(@hpricot)
end
def attributes=(attrs)
attrs.each { |name, value| @hpricot[name] = value }
end
def printTree(indent=0)
tree = "\n|#{' ' * indent}<#{name}>"
indent += 2
attributes.each do |name, value|
next if name == 'xmlns'
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
end
childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
end
end
class Document < Node
def self.hpricot_class
::Hpricot::Doc
end
def initialize
super(nil)
end
def printTree(indent=0)
childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
end
end
class DocumentType < Node
def_delegators :@hpricot, :public_id, :system_id
def self.hpricot_class
::Hpricot::DocType
end
def initialize(name, public_id, system_id)
begin
super(name)
rescue ArgumentError # needs 3...
end
@hpricot = ::Hpricot::DocType.new(name, public_id, system_id)
end
def printTree(indent=0)
if hpricot.target and hpricot.target.any?
"\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
else
"\n|#{' ' * indent}<!DOCTYPE >"
end
end
end
class DocumentFragment < Element
def initialize
super('')
end
def printTree(indent=0)
childNodes.inject('') {|tree, child| tree + child.printTree(indent + 2) }
end
end
class TextNode < Node
def initialize(data)
@hpricot = ::Hpricot::Text.new(data)
end
def printTree(indent=0)
"\n|#{' ' * indent}\"#{hpricot.content}\""
end
end
class CommentNode < Node
def self.hpricot_class
::Hpricot::Comment
end
def printTree(indent=0)
"\n|#{' ' * indent}<!-- #{hpricot.content} -->"
end
end
class TreeBuilder < Base::TreeBuilder
def initialize
@documentClass = Document
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@fragmentClass = DocumentFragment
end
def insertDoctype(name, public_id, system_id)
doctype = @doctypeClass.new(name, public_id, system_id)
@document.appendChild(doctype)
end
def testSerializer(node)
node.printTree
end
def get_document
@document.hpricot
end
def get_fragment
@document = super
return @document.hpricot.children
end
end
end
end
end

View file

@ -0,0 +1,209 @@
require 'html5/treebuilders/base'
require 'rexml/document'
require 'forwardable'
module HTML5
module TreeBuilders
module REXML
class Node < Base::Node
extend Forwardable
def_delegators :@rxobj, :name, :attributes
attr_accessor :rxobj
def initialize name
super name
@rxobj = self.class.rxclass.new name
end
def appendChild node
if node.kind_of?(TextNode) && childNodes.length > 0 && childNodes.last.kind_of?(TextNode)
childNodes.last.rxobj.value = childNodes.last.rxobj.to_s + node.rxobj.to_s
childNodes.last.rxobj.raw = true
else
childNodes.push node
rxobj.add node.rxobj
end
node.parent = self
end
def removeChild node
childNodes.delete node
rxobj.delete node.rxobj
node.parent = nil
end
def insertText data, before=nil
if before
insertBefore TextNode.new(data), before
else
appendChild TextNode.new(data)
end
end
def insertBefore node, refNode
index = childNodes.index(refNode)
if node.kind_of?(TextNode) and index > 0 && childNodes[index-1].kind_of?(TextNode)
childNodes[index-1].rxobj.value = childNodes[index-1].rxobj.to_s + node.rxobj.to_s
childNodes[index-1].rxobj.raw = true
else
childNodes.insert index, node
refNode.rxobj.parent.insert_before(refNode.rxobj,node.rxobj)
end
end
def hasContent
(childNodes.length > 0)
end
end
class Element < Node
def self.rxclass
::REXML::Element
end
def initialize name
super name
end
def cloneNode
newNode = self.class.new name
attributes.each {|name,value| newNode.attributes[name] = value}
newNode
end
def attributes= value
value.each {|name, value| rxobj.attributes[name] = value}
end
def printTree indent=0
tree = "\n|#{' ' * indent}<#{name}>"
indent += 2
for name, value in attributes
next if name == 'xmlns'
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
end
for child in childNodes
tree += child.printTree(indent)
end
tree
end
end
class Document < Node
def self.rxclass
::REXML::Document
end
def initialize
super nil
end
# ryansking: not sure why this was here. removing it doesn't cause any tests to fail
# def appendChild node
# if node.kind_of? Element and node.name == 'html'
# node.rxobj.add_namespace('http://www.w3.org/1999/xhtml')
# end
# super node
# end
def printTree indent=0
tree = "#document"
for child in childNodes
tree += child.printTree(indent + 2)
end
return tree
end
end
class DocumentType < Node
def_delegator :@rxobj, :public, :public_id
def_delegator :@rxobj, :system, :system_id
def self.rxclass
::REXML::DocType
end
def initialize name, public_id, system_id
super(name)
if public_id
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::PUBLIC, public_id, system_id]
elsif system_id
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::SYSTEM, nil, system_id]
else
@rxobj = ::REXML::DocType.new name
end
end
def printTree indent=0
"\n|#{' ' * indent}<!DOCTYPE #{name}>"
end
end
class DocumentFragment < Element
def initialize
super nil
end
def printTree indent=0
tree = ""
for child in childNodes
tree += child.printTree(indent+2)
end
return tree
end
end
class TextNode < Node
def initialize data
raw = data.gsub('&', '&amp;').gsub('<', '&lt;').gsub('>', '&gt;')
@rxobj = ::REXML::Text.new(raw, true, nil, true)
end
def printTree indent=0
"\n|#{' ' * indent}\"#{rxobj.value}\""
end
end
class CommentNode < Node
def self.rxclass
::REXML::Comment
end
def printTree indent=0
"\n|#{' ' * indent}<!-- #{rxobj.string} -->"
end
end
class TreeBuilder < Base::TreeBuilder
def initialize
@documentClass = Document
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@fragmentClass = DocumentFragment
end
def insertDoctype(name, public_id, system_id)
doctype = @doctypeClass.new(name, public_id, system_id)
@document.appendChild(doctype)
end
def testSerializer node
node.printTree
end
def get_document
@document.rxobj
end
def get_fragment
@document = super
return @document.rxobj.children
end
end
end
end
end

View file

@ -0,0 +1,185 @@
require 'html5/treebuilders/base'
module HTML5
module TreeBuilders
module SimpleTree
class Node < Base::Node
# Node representing an item in the tree.
# name - The tag name associated with the node
attr_accessor :name
# The value of the current node (applies to text nodes and
# comments
attr_accessor :value
# a dict holding name, value pairs for attributes of the node
attr_accessor :attributes
def initialize name
super
@name = name
@value = nil
@attributes = {}
end
def appendChild node
if node.kind_of? TextNode and
childNodes.length > 0 and childNodes.last.kind_of? TextNode
childNodes.last.value += node.value
else
childNodes << node
end
node.parent = self
end
def removeChild node
childNodes.delete node
node.parent = nil
end
def cloneNode
newNode = self.class.new name
attributes.each {|name,value| newNode.attributes[name] = value}
newNode.value = value
newNode
end
def insertText data, before=nil
if before
insertBefore TextNode.new(data), before
else
appendChild TextNode.new(data)
end
end
def insertBefore node, refNode
index = childNodes.index(refNode)
if node.kind_of?(TextNode) && index > 0 && childNodes[index-1].kind_of?(TextNode)
childNodes[index-1].value += node.value
else
childNodes.insert index, node
end
end
def printTree indent=0
tree = "\n|%s%s" % [' '* indent, self.to_s]
for child in childNodes
tree += child.printTree(indent + 2)
end
return tree
end
def hasContent
childNodes.length > 0
end
end
class Element < Node
def to_s
"<#{name}>"
end
def printTree indent=0
tree = "\n|%s%s" % [' '* indent, self.to_s]
indent += 2
for name, value in attributes
tree += "\n|%s%s=\"%s\"" % [' ' * indent, name, value]
end
for child in childNodes
tree += child.printTree(indent)
end
tree
end
end
class Document < Node
def to_s
"#document"
end
def initialize
super nil
end
def printTree indent=0
tree = to_s
for child in childNodes
tree += child.printTree(indent + 2)
end
tree
end
end
class DocumentType < Node
attr_accessor :public_id, :system_id
def to_s
"<!DOCTYPE #{name}>"
end
def initialize name
super name
@public_id = nil
@system_id = nil
end
end
class DocumentFragment < Element
def initialize
super nil
end
def printTree indent=0
tree = ""
for child in childNodes
tree += child.printTree(indent+2)
end
return tree
end
end
class TextNode < Node
def initialize value
super nil
@value = value
end
def to_s
'"%s"' % value
end
end
class CommentNode < Node
def initialize value
super nil
@value = value
end
def to_s
"<!-- %s -->" % value
end
end
class TreeBuilder < Base::TreeBuilder
def initialize
@documentClass = Document
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@fragmentClass = DocumentFragment
end
def testSerializer node
node.printTree
end
def get_fragment
@document = super
@document
end
end
end
end
end