Minor S5 tweaks and Sync with Latest HTML5lib

This commit is contained in:
Jacques Distler 2007-08-30 12:19:10 -05:00
parent dbed460843
commit 81d3cdc8e4
81 changed files with 9887 additions and 1687 deletions

View file

@ -3,153 +3,151 @@ module HTML5
module TreeWalkers
module TokenConstructor
def error(msg)
return {:type => "SerializeError", :data => msg}
def error(msg)
{:type => "SerializeError", :data => msg}
end
def normalize_attrs(attrs)
attrs.to_a
end
def empty_tag(name, attrs, has_children=false)
error(_("Void element has children")) if has_children
{:type => :EmptyTag, :name => name, :data => normalize_attrs(attrs)}
end
def start_tag(name, attrs)
{:type => :StartTag, :name => name, :data => normalize_attrs(attrs)}
end
def end_tag(name)
{:type => :EndTag, :name => name, :data => []}
end
def text(data)
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
yield({:type => :SpaceCharacters, :data => $1})
data = data[$1.length .. -1]
return if data.empty?
end
def normalizeAttrs(attrs)
attrs.to_a
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
yield({:type => :Characters, :data => data[0 ... -$1.length]})
yield({:type => :SpaceCharacters, :data => $1})
else
yield({:type => :Characters, :data => data})
end
end
def emptyTag(name, attrs, hasChildren=false)
error(_("Void element has children")) if hasChildren
return({:type => :EmptyTag, :name => name, \
:data => normalizeAttrs(attrs)})
end
def comment(data)
{:type => :Comment, :data => data}
end
def startTag(name, attrs)
return {:type => :StartTag, :name => name, \
:data => normalizeAttrs(attrs)}
end
def doctype(name, public_id, system_id, correct=nil)
{:type => :Doctype, :name => name, :public_id => public_id, :system_id => system_id, :correct => correct}
end
def endTag(name)
return {:type => :EndTag, :name => name, :data => []}
end
def unknown(nodeType)
error(_("Unknown node type: ") + nodeType.to_s)
end
def text(data)
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
yield({:type => :SpaceCharacters, :data => $1})
data = data[$1.length .. -1]
return if data.empty?
end
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
yield({:type => :Characters, :data => data[0 ... -$1.length]})
yield({:type => :SpaceCharacters, :data => $1})
else
yield({:type => :Characters, :data => data})
end
end
def comment(data)
return {:type => :Comment, :data => data}
end
def doctype(name)
return {:type => :Doctype, :name => name, :data => name.upcase() == "HTML"}
end
def unknown(nodeType)
return error(_("Unknown node type: ") + nodeType.to_s)
end
def _(str)
str
end
def _(str)
str
end
end
class Base
include TokenConstructor
def initialize(tree)
@tree = tree
@tree = tree
end
def each
raise NotImplementedError
raise NotImplementedError
end
alias walk each
end
class NonRecursiveTreeWalker < TreeWalkers::Base
def node_details(node)
raise NotImplementedError
end
def node_details(node)
raise NotImplementedError
end
def first_child(node)
raise NotImplementedError
end
def first_child(node)
raise NotImplementedError
end
def next_sibling(node)
raise NotImplementedError
end
def next_sibling(node)
raise NotImplementedError
end
def parent(node)
raise NotImplementedError
end
def parent(node)
raise NotImplementedError
end
def each
currentNode = @tree
while currentNode != nil
details = node_details(currentNode)
hasChildren = false
def each
current_node = @tree
while current_node != nil
details = node_details(current_node)
has_children = false
case details.shift
when :DOCTYPE
yield doctype(*details)
case details.shift
when :DOCTYPE
yield doctype(*details)
when :TEXT
text(*details) {|token| yield token}
when :TEXT
text(*details) {|token| yield token}
when :ELEMENT
name, attributes, hasChildren = details
if VOID_ELEMENTS.include?(name)
yield emptyTag(name, attributes.to_a, hasChildren)
hasChildren = false
else
yield startTag(name, attributes.to_a)
end
when :COMMENT
yield comment(details[0])
when :DOCUMENT, :DOCUMENT_FRAGMENT
hasChildren = true
when nil
# ignore (REXML::XMLDecl is an example)
else
yield unknown(details[0])
end
firstChild = hasChildren ? first_child(currentNode) : nil
if firstChild != nil
currentNode = firstChild
else
while currentNode != nil
details = node_details(currentNode)
if details.shift == :ELEMENT
name, attributes, hasChildren = details
yield endTag(name) if !VOID_ELEMENTS.include?(name)
end
if @tree == currentNode
currentNode = nil
else
nextSibling = next_sibling(currentNode)
if nextSibling != nil
currentNode = nextSibling
break
end
currentNode = parent(currentNode)
end
end
end
when :ELEMENT
name, attributes, has_children = details
if VOID_ELEMENTS.include?(name)
yield empty_tag(name, attributes.to_a, has_children)
has_children = false
else
yield start_tag(name, attributes.to_a)
end
when :COMMENT
yield comment(details[0])
when :DOCUMENT, :DOCUMENT_FRAGMENT
has_children = true
when nil
# ignore (REXML::XMLDecl is an example)
else
yield unknown(details[0])
end
first_child = has_children ? first_child(current_node) : nil
if first_child != nil
current_node = first_child
else
while current_node != nil
details = node_details(current_node)
if details.shift == :ELEMENT
name, attributes, has_children = details
yield end_tag(name) if !VOID_ELEMENTS.include?(name)
end
if @tree == current_node
current_node = nil
else
next_sibling = next_sibling(current_node)
if next_sibling != nil
current_node = next_sibling
break
end
current_node = parent(current_node)
end
end
end
end
end
end
end

View file

@ -13,17 +13,17 @@ module HTML5
[:DOCUMENT_FRAGMENT]
else
[:ELEMENT, node.name,
node.attributes.map {|name,value| [name,value]},
node.attributes.map {|name, value| [name, value]},
!node.empty?]
end
when ::Hpricot::Text
[:TEXT, node.to_plain_text]
[:TEXT, node.content]
when ::Hpricot::Comment
[:COMMENT, node.content]
when ::Hpricot::Doc
[:DOCUMENT]
when ::Hpricot::DocType
[:DOCTYPE, node.target]
[:DOCTYPE, node.target, node.public_id, node.system_id]
when ::Hpricot::XMLDecl
[nil]
else

View file

@ -23,7 +23,7 @@ module HTML5
when ::REXML::Comment
[:COMMENT, node.string]
when ::REXML::DocType
[:DOCTYPE, node.name]
[:DOCTYPE, node.name, node.public, node.system]
when ::REXML::XMLDecl
[nil]
else

View file

@ -12,20 +12,20 @@ module HTML5
return
when DocumentType
yield doctype(node.name)
yield doctype(node.name, node.public_id, node.system_id)
when TextNode
text(node.value) {|token| yield token}
when Element
if VOID_ELEMENTS.include?(node.name)
yield emptyTag(node.name, node.attributes, node.hasContent())
yield empty_tag(node.name, node.attributes, node.hasContent())
else
yield startTag(node.name, node.attributes)
yield start_tag(node.name, node.attributes)
for child in node.childNodes
walk(child) {|token| yield token}
end
yield endTag(node.name)
yield end_tag(node.name)
end
when CommentNode