Sync with latest HTML5lib and latest Maruku

This commit is contained in:
Jacques Distler 2007-07-04 17:36:59 -05:00
parent 8e92e4a3ab
commit 8ccaad85a5
71 changed files with 1974 additions and 1621 deletions

View file

@ -0,0 +1,330 @@
require 'html5/constants'
#XXX - TODO; make the default interface more ElementTree-like rather than DOM-like
module HTML5
# The scope markers are inserted when entering buttons, object elements,
# marquees, table cells, and table captions, and are used to prevent formatting
# from "leaking" into tables, buttons, object elements, and marquees.
Marker = nil
module TreeBuilders
module Base
class Node
# The parent of the current node (or nil for the document node)
attr_accessor :parent
# a list of child nodes of the current node. This must
# include all elements but not necessarily other node types
attr_accessor :childNodes
# A list of miscellaneous flags that can be set on the node
attr_accessor :_flags
def initialize(name)
@parent = nil
@childNodes = []
@_flags = []
end
# Insert node as a child of the current node
def appendChild(node)
raise NotImplementedError
end
# Insert data as text in the current node, positioned before the
# start of node insertBefore or to the end of the node's text.
def insertText(data, insertBefore=nil)
raise NotImplementedError
end
# Insert node as a child of the current node, before refNode in the
# list of child nodes. Raises ValueError if refNode is not a child of
# the current node
def insertBefore(node, refNode)
raise NotImplementedError
end
# Remove node from the children of the current node
def removeChild(node)
raise NotImplementedError
end
# Move all the children of the current node to newParent.
# This is needed so that trees that don't store text as nodes move the
# text in the correct way
def reparentChildren(newParent)
#XXX - should this method be made more general?
@childNodes.each { |child| newParent.appendChild(child) }
@childNodes = []
end
# Return a shallow copy of the current node i.e. a node with the same
# name and attributes but with no parent or child nodes
def cloneNode
raise NotImplementedError
end
# Return true if the node has children or text, false otherwise
def hasContent
raise NotImplementedError
end
end
# Base treebuilder implementation
class TreeBuilder
attr_accessor :openElements
attr_accessor :activeFormattingElements
attr_accessor :document
attr_accessor :headPointer
attr_accessor :formPointer
# Class to use for document root
documentClass = nil
# Class to use for HTML elements
elementClass = nil
# Class to use for comments
commentClass = nil
# Class to use for doctypes
doctypeClass = nil
# Fragment class
fragmentClass = nil
def initialize
reset
end
def reset
@openElements = []
@activeFormattingElements = []
#XXX - rename these to headElement, formElement
@headPointer = nil
@formPointer = nil
self.insertFromTable = false
@document = @documentClass.new
end
def elementInScope(target, tableVariant=false)
# Exit early when possible.
return true if @openElements[-1].name == target
# AT How about while true and simply set node to [-1] and set it to
# [-2] at the end...
@openElements.reverse.each do |element|
if element.name == target
return true
elsif element.name == 'table'
return false
elsif not tableVariant and SCOPING_ELEMENTS.include?(element.name)
return false
elsif element.name == 'html'
return false
end
end
assert false # We should never reach this point
end
def reconstructActiveFormattingElements
# Within this algorithm the order of steps described in the
# specification is not quite the same as the order of steps in the
# code. It should still do the same though.
# Step 1: stop the algorithm when there's nothing to do.
return if @activeFormattingElements.empty?
# Step 2 and step 3: we start with the last element. So i is -1.
i = -1
entry = @activeFormattingElements[i]
return if entry == Marker or @openElements.include?(entry)
# Step 6
until entry == Marker or @openElements.include?(entry)
# Step 5: let entry be one earlier in the list.
i -= 1
begin
entry = @activeFormattingElements[i]
rescue
# Step 4: at this point we need to jump to step 8. By not doing
# i += 1 which is also done in step 7 we achieve that.
break
end
end
while true
# Step 7
i += 1
# Step 8
clone = @activeFormattingElements[i].cloneNode
# Step 9
element = insertElement(clone.name, clone.attributes)
# Step 10
@activeFormattingElements[i] = element
# Step 11
break if element == @activeFormattingElements[-1]
end
end
def clearActiveFormattingElements
{} until @activeFormattingElements.empty? || @activeFormattingElements.pop == Marker
end
# Check if an element exists between the end of the active
# formatting elements and the last marker. If it does, return it, else
# return false
def elementInActiveFormattingElements(name)
@activeFormattingElements.reverse.each do |element|
# Check for Marker first because if it's a Marker it doesn't have a
# name attribute.
break if element == Marker
return element if element.name == name
end
return false
end
def insertDoctype(name)
@document.appendChild(@doctypeClass.new(name))
end
def insertComment(data, parent=nil)
parent = @openElements[-1] if parent.nil?
parent.appendChild(@commentClass.new(data))
end
# Create an element but don't insert it anywhere
def createElement(name, attributes)
element = @elementClass.new(name)
element.attributes = attributes
return element
end
# Switch the function used to insert an element from the
# normal one to the misnested table one and back again
def insertFromTable=(value)
@insertFromTable = value
@insertElement = value ? :insertElementTable : :insertElementNormal
end
def insertElement(name, attributes)
send(@insertElement, name, attributes)
end
def insertElementNormal(name, attributes)
element = @elementClass.new(name)
element.attributes = attributes
@openElements[-1].appendChild(element)
@openElements.push(element)
return element
end
# Create an element and insert it into the tree
def insertElementTable(name, attributes)
element = @elementClass.new(name)
element.attributes = attributes
if TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name)
#We should be in the InTable mode. This means we want to do
#special magic element rearranging
parent, insertBefore = getTableMisnestedNodePosition
if insertBefore.nil?
parent.appendChild(element)
else
parent.insertBefore(element, insertBefore)
end
@openElements.push(element)
else
return insertElementNormal(name, attributes)
end
return element
end
def insertText(data, parent=nil)
parent = @openElements[-1] if parent.nil?
if (not(@insertFromTable) or (@insertFromTable and not TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name)))
parent.insertText(data)
else
#We should be in the InTable mode. This means we want to do
#special magic element rearranging
parent, insertBefore = getTableMisnestedNodePosition
parent.insertText(data, insertBefore)
end
end
# Get the foster parent element, and sibling to insert before
# (or nil) when inserting a misnested table node
def getTableMisnestedNodePosition
#The foster parent element is the one which comes before the most
#recently opened table element
#XXX - this is really inelegant
lastTable = nil
fosterParent = nil
insertBefore = nil
@openElements.reverse.each do |element|
if element.name == "table"
lastTable = element
break
end
end
if lastTable
#XXX - we should really check that this parent is actually a
#node here
if lastTable.parent
fosterParent = lastTable.parent
insertBefore = lastTable
else
fosterParent = @openElements[@openElements.index(lastTable) - 1]
end
else
fosterParent = @openElements[0]
end
return fosterParent, insertBefore
end
def generateImpliedEndTags(exclude=nil)
name = @openElements[-1].name
if (['dd', 'dt', 'li', 'p', 'td', 'th', 'tr'].include?(name) and name != exclude)
@openElements.pop
# XXX This is not entirely what the specification says. We should
# investigate it more closely.
generateImpliedEndTags(exclude)
end
end
def getDocument
@document
end
def getFragment
#assert @innerHTML
fragment = @fragmentClass.new
@openElements[0].reparentChildren(fragment)
return fragment
end
# Serialize the subtree of node in the format required by unit tests
# node - the node from which to start serializing
def testSerializer(node)
raise NotImplementedError
end
end
end
end
end

View file

@ -0,0 +1,221 @@
require 'html5/treebuilders/base'
require 'rubygems'
require 'hpricot'
require 'forwardable'
module HTML5
module TreeBuilders
module Hpricot
class Node < Base::Node
extend Forwardable
def_delegators :@hpricot, :name
attr_accessor :hpricot
def initialize(name)
super(name)
@hpricot = self.class.hpricot_class.new name
end
def appendChild(node)
if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
childNodes[-1].hpricot.content = childNodes[-1].hpricot.to_s + node.hpricot.to_s
else
childNodes << node
hpricot.children << node.hpricot
end
if (oldparent = node.hpricot.parent) != nil
oldparent.children.delete_at(oldparent.children.index(node.hpricot))
end
node.hpricot.parent = hpricot
node.parent = self
end
def removeChild(node)
childNodes.delete(node)
hpricot.children.delete_at(hpricot.children.index(node.hpricot))
node.hpricot.parent = nil
node.parent = nil
end
def insertText(data, before=nil)
if before
insertBefore(TextNode.new(data), before)
else
appendChild(TextNode.new(data))
end
end
def insertBefore(node, refNode)
index = childNodes.index(refNode)
if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
else
refNode.hpricot.parent.insert_before(node.hpricot,refNode.hpricot)
childNodes.insert(index, node)
end
end
def hasContent
childNodes.any?
end
end
class Element < Node
def self.hpricot_class
::Hpricot::Elem
end
def initialize(name)
super(name)
@hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
end
def name
@hpricot.stag.name
end
def cloneNode
attributes.inject(self.class.new(name)) do |node, (name, value)|
node.hpricot[name] = value
node
end
end
# A call to Hpricot::Elem#raw_attributes is built dynamically,
# so alterations to the returned value (a hash) will be lost.
#
# AttributeProxy works around this by forwarding :[]= calls
# to the raw_attributes accessor on the element start tag.
#
class AttributeProxy
def initialize(hpricot)
@hpricot = hpricot
end
def []=(k, v)
@hpricot.stag.send(stag_attributes_method)[k] = v
end
def stag_attributes_method
# STag#attributes changed to STag#raw_attributes after Hpricot 0.5
@hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
end
def method_missing(*a, &b)
@hpricot.attributes.send(*a, &b)
end
end
def attributes
AttributeProxy.new(@hpricot)
end
def attributes=(attrs)
attrs.each { |name, value| @hpricot[name] = value }
end
def printTree(indent=0)
tree = "\n|#{' ' * indent}<#{name}>"
indent += 2
attributes.each do |name, value|
next if name == 'xmlns'
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
end
childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
end
end
class Document < Node
def self.hpricot_class
::Hpricot::Doc
end
def initialize
super(nil)
end
def printTree(indent=0)
childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
end
end
class DocumentType < Node
def self.hpricot_class
::Hpricot::DocType
end
def initialize(name)
begin
super(name)
rescue ArgumentError # needs 3...
end
@hpricot = ::Hpricot::DocType.new(name, nil, nil)
end
def printTree(indent=0)
"\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
end
end
class DocumentFragment < Element
def initialize
super('')
end
def printTree(indent=0)
childNodes.inject('') { |tree, child| tree + child.printTree(indent+2) }
end
end
class TextNode < Node
def initialize(data)
@hpricot = ::Hpricot::Text.new(data)
end
def printTree(indent=0)
"\n|#{' ' * indent}\"#{hpricot.content}\""
end
end
class CommentNode < Node
def self.hpricot_class
::Hpricot::Comment
end
def printTree(indent=0)
"\n|#{' ' * indent}<!-- #{hpricot.content} -->"
end
end
class TreeBuilder < Base::TreeBuilder
def initialize
@documentClass = Document
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@fragmentClass = DocumentFragment
end
def testSerializer(node)
node.printTree
end
def getDocument
@document.hpricot
end
def getFragment
@document = super
return @document.hpricot.children
end
end
end
end
end

View file

@ -0,0 +1,192 @@
require 'html5/treebuilders/base'
require 'rexml/document'
require 'forwardable'
module HTML5
module TreeBuilders
module REXML
class Node < Base::Node
extend Forwardable
def_delegators :@rxobj, :name, :attributes
attr_accessor :rxobj
def initialize name
super name
@rxobj = self.class.rxclass.new name
end
def appendChild node
if node.kind_of? TextNode and
childNodes.length>0 and childNodes[-1].kind_of? TextNode
childNodes[-1].rxobj.value =
childNodes[-1].rxobj.to_s + node.rxobj.to_s
childNodes[-1].rxobj.raw = true
else
childNodes.push node
rxobj.add node.rxobj
end
node.parent = self
end
def removeChild node
childNodes.delete node
rxobj.delete node.rxobj
node.parent = nil
end
def insertText data, before=nil
if before
insertBefore TextNode.new(data), before
else
appendChild TextNode.new(data)
end
end
def insertBefore node, refNode
index = childNodes.index(refNode)
if node.kind_of? TextNode and index>0 and
childNodes[index-1].kind_of? TextNode
childNodes[index-1].rxobj.value =
childNodes[index-1].rxobj.to_s + node.rxobj.to_s
childNodes[index-1].rxobj.raw = true
else
childNodes.insert index, node
refNode.rxobj.parent.insert_before(refNode.rxobj,node.rxobj)
end
end
def hasContent
return (childNodes.length > 0)
end
end
class Element < Node
def self.rxclass
::REXML::Element
end
def initialize name
super name
end
def cloneNode
newNode = self.class.new name
attributes.each {|name,value| newNode.attributes[name] = value}
newNode
end
def attributes= value
value.each {|name, value| rxobj.attributes[name]=value}
end
def printTree indent=0
tree = "\n|#{' ' * indent}<#{name}>"
indent += 2
for name, value in attributes
next if name == 'xmlns'
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
end
for child in childNodes
tree += child.printTree(indent)
end
return tree
end
end
class Document < Node
def self.rxclass
::REXML::Document
end
def initialize
super nil
end
def appendChild node
if node.kind_of? Element and node.name == 'html'
node.rxobj.add_namespace('http://www.w3.org/1999/xhtml')
end
super node
end
def printTree indent=0
tree = "#document"
for child in childNodes
tree += child.printTree(indent + 2)
end
return tree
end
end
class DocumentType < Node
def self.rxclass
::REXML::DocType
end
def printTree indent=0
"\n|#{' ' * indent}<!DOCTYPE #{name}>"
end
end
class DocumentFragment < Element
def initialize
super nil
end
def printTree indent=0
tree = ""
for child in childNodes
tree += child.printTree(indent+2)
end
return tree
end
end
class TextNode < Node
def initialize data
raw=data.gsub('&','&amp;').gsub('<','&lt;').gsub('>','&gt;')
@rxobj = ::REXML::Text.new(raw, true, nil, true)
end
def printTree indent=0
"\n|#{' ' * indent}\"#{rxobj.value}\""
end
end
class CommentNode < Node
def self.rxclass
::REXML::Comment
end
def printTree indent=0
"\n|#{' ' * indent}<!-- #{rxobj.string} -->"
end
end
class TreeBuilder < Base::TreeBuilder
def initialize
@documentClass = Document
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@fragmentClass = DocumentFragment
end
def testSerializer node
node.printTree()
end
def getDocument
@document.rxobj
end
def getFragment
@document = super
return @document.rxobj.children
end
end
end
end
end

View file

@ -0,0 +1,178 @@
require 'html5/treebuilders/base'
module HTML5
module TreeBuilders
module SimpleTree
class Node < Base::Node
# Node representing an item in the tree.
# name - The tag name associated with the node
attr_accessor :name
# The value of the current node (applies to text nodes and
# comments
attr_accessor :value
# a dict holding name, value pairs for attributes of the node
attr_accessor :attributes
def initialize name
super
@name = name
@value = nil
@attributes = {}
end
def appendChild node
if node.kind_of? TextNode and
childNodes.length>0 and childNodes[-1].kind_of? TextNode
childNodes[-1].value += node.value
else
childNodes.push node
end
node.parent = self
end
def removeChild node
childNodes.delete node
node.parent = nil
end
def cloneNode
newNode = self.class.new name
attributes.each {|name,value| newNode.attributes[name] = value}
newNode.value = value
newNode
end
def insertText data, before=nil
if before
insertBefore TextNode.new(data), before
else
appendChild TextNode.new(data)
end
end
def insertBefore node, refNode
index = childNodes.index(refNode)
if node.kind_of? TextNode and index>0 and
childNodes[index-1].kind_of? TextNode
childNodes[index-1].value += node.value
else
childNodes.insert index, node
end
end
def printTree indent=0
tree = "\n|%s%s" % [' '* indent, self.to_s]
for child in childNodes
tree += child.printTree(indent + 2)
end
return tree
end
def hasContent
return (childNodes.length > 0)
end
end
class Element < Node
def to_s
"<#{name}>"
end
def printTree indent=0
tree = "\n|%s%s" % [' '* indent, self.to_s]
indent += 2
for name, value in attributes
tree += "\n|%s%s=\"%s\"" % [' ' * indent, name, value]
end
for child in childNodes
tree += child.printTree(indent)
end
return tree
end
end
class Document < Node
def to_s
"#document"
end
def initialize
super nil
end
def printTree indent=0
tree = to_s
for child in childNodes
tree += child.printTree(indent + 2)
end
return tree
end
end
class DocumentType < Node
def to_s
"<!DOCTYPE %s>" % name
end
end
class DocumentFragment < Element
def initialize
super nil
end
def printTree indent=0
tree = ""
for child in childNodes
tree += child.printTree(indent+2)
end
return tree
end
end
class TextNode < Node
def initialize value
super nil
@value = value
end
def to_s
'"%s"' % value
end
end
class CommentNode < Node
def initialize value
super nil
@value = value
end
def to_s
"<!-- %s -->" % value
end
end
class TreeBuilder < Base::TreeBuilder
def initialize
@documentClass = Document
@doctypeClass = DocumentType
@elementClass = Element
@commentClass = CommentNode
@fragmentClass = DocumentFragment
end
def testSerializer node
node.printTree()
end
def getFragment
@document = super
return @document.childNodes
end
end
end
end
end