HTML5lib is Back.
Synced with latest version of HTML5lib, which fixes problem with Astral plane characters. I should really do some tests, but the HTML5lib Sanitizer seems to be 2-5 times slower than the old sanitizer.
This commit is contained in:
parent
e1a6827f1f
commit
4dd70af5ae
39 changed files with 4843 additions and 5576 deletions
530
lib/node.rb
530
lib/node.rb
|
@ -1,530 +0,0 @@
|
||||||
require 'strscan'
|
|
||||||
|
|
||||||
module XHTML #:nodoc:
|
|
||||||
|
|
||||||
class Conditions < Hash #:nodoc:
|
|
||||||
def initialize(hash)
|
|
||||||
super()
|
|
||||||
hash = { :content => hash } unless Hash === hash
|
|
||||||
hash = keys_to_symbols(hash)
|
|
||||||
hash.each do |k,v|
|
|
||||||
case k
|
|
||||||
when :tag, :content then
|
|
||||||
# keys are valid, and require no further processing
|
|
||||||
when :attributes then
|
|
||||||
hash[k] = keys_to_strings(v)
|
|
||||||
when :parent, :child, :ancestor, :descendant, :sibling, :before,
|
|
||||||
:after
|
|
||||||
hash[k] = Conditions.new(v)
|
|
||||||
when :children
|
|
||||||
hash[k] = v = keys_to_symbols(v)
|
|
||||||
v.each do |k,v2|
|
|
||||||
case k
|
|
||||||
when :count, :greater_than, :less_than
|
|
||||||
# keys are valid, and require no further processing
|
|
||||||
when :only
|
|
||||||
v[k] = Conditions.new(v2)
|
|
||||||
else
|
|
||||||
raise "illegal key #{k.inspect} => #{v2.inspect}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
raise "illegal key #{k.inspect} => #{v.inspect}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
update hash
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
def keys_to_strings(hash)
|
|
||||||
hash.keys.inject({}) do |h,k|
|
|
||||||
h[k.to_s] = hash[k]
|
|
||||||
h
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def keys_to_symbols(hash)
|
|
||||||
hash.keys.inject({}) do |h,k|
|
|
||||||
raise "illegal key #{k.inspect}" unless k.respond_to?(:to_sym)
|
|
||||||
h[k.to_sym] = hash[k]
|
|
||||||
h
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# The base class of all nodes, textual and otherwise, in an HTML document.
|
|
||||||
class Node #:nodoc:
|
|
||||||
# The array of children of this node. Not all nodes have children.
|
|
||||||
attr_reader :children
|
|
||||||
|
|
||||||
# The parent node of this node. All nodes have a parent, except for the
|
|
||||||
# root node.
|
|
||||||
attr_reader :parent
|
|
||||||
|
|
||||||
# The line number of the input where this node was begun
|
|
||||||
attr_reader :line
|
|
||||||
|
|
||||||
# The byte position in the input where this node was begun
|
|
||||||
attr_reader :position
|
|
||||||
|
|
||||||
# Create a new node as a child of the given parent.
|
|
||||||
def initialize(parent, line=0, pos=0)
|
|
||||||
@parent = parent
|
|
||||||
@children = []
|
|
||||||
@line, @position = line, pos
|
|
||||||
end
|
|
||||||
|
|
||||||
# Return a textual representation of the node.
|
|
||||||
def to_s
|
|
||||||
s = ""
|
|
||||||
@children.each { |child| s << child.to_s }
|
|
||||||
s
|
|
||||||
end
|
|
||||||
|
|
||||||
# Return false (subclasses must override this to provide specific matching
|
|
||||||
# behavior.) +conditions+ may be of any type.
|
|
||||||
def match(conditions)
|
|
||||||
false
|
|
||||||
end
|
|
||||||
|
|
||||||
# Search the children of this node for the first node for which #find
|
|
||||||
# returns non +nil+. Returns the result of the #find call that succeeded.
|
|
||||||
def find(conditions)
|
|
||||||
conditions = validate_conditions(conditions)
|
|
||||||
@children.each do |child|
|
|
||||||
node = child.find(conditions)
|
|
||||||
return node if node
|
|
||||||
end
|
|
||||||
nil
|
|
||||||
end
|
|
||||||
|
|
||||||
# Search for all nodes that match the given conditions, and return them
|
|
||||||
# as an array.
|
|
||||||
def find_all(conditions)
|
|
||||||
conditions = validate_conditions(conditions)
|
|
||||||
|
|
||||||
matches = []
|
|
||||||
matches << self if match(conditions)
|
|
||||||
@children.each do |child|
|
|
||||||
matches.concat child.find_all(conditions)
|
|
||||||
end
|
|
||||||
matches
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns +false+. Subclasses may override this if they define a kind of
|
|
||||||
# tag.
|
|
||||||
def tag?
|
|
||||||
false
|
|
||||||
end
|
|
||||||
|
|
||||||
def validate_conditions(conditions)
|
|
||||||
Conditions === conditions ? conditions : Conditions.new(conditions)
|
|
||||||
end
|
|
||||||
|
|
||||||
def ==(node)
|
|
||||||
return false unless self.class == node.class && children.size == node.children.size
|
|
||||||
|
|
||||||
equivalent = true
|
|
||||||
|
|
||||||
children.size.times do |i|
|
|
||||||
equivalent &&= children[i] == node.children[i]
|
|
||||||
end
|
|
||||||
|
|
||||||
equivalent
|
|
||||||
end
|
|
||||||
|
|
||||||
class <<self
|
|
||||||
def parse(parent, line, pos, content, strict=true)
|
|
||||||
if content !~ /^<\S/
|
|
||||||
Text.new(parent, line, pos, content)
|
|
||||||
else
|
|
||||||
scanner = StringScanner.new(content)
|
|
||||||
|
|
||||||
unless scanner.skip(/</)
|
|
||||||
if strict
|
|
||||||
raise "expected <"
|
|
||||||
else
|
|
||||||
return Text.new(parent, line, pos, content)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
if scanner.skip(/!\[CDATA\[/)
|
|
||||||
scanner.scan_until(/\]\]>/)
|
|
||||||
return CDATA.new(parent, line, pos, scanner.pre_match.gsub(/<!\[CDATA\[/, ''))
|
|
||||||
end
|
|
||||||
|
|
||||||
closing = ( scanner.scan(/\//) ? :close : nil )
|
|
||||||
return Text.new(parent, line, pos, content) unless name = scanner.scan(/[\w:-]+/)
|
|
||||||
name
|
|
||||||
|
|
||||||
unless closing
|
|
||||||
scanner.skip(/\s*/)
|
|
||||||
attributes = {}
|
|
||||||
while attr = scanner.scan(/[-\w:]+/)
|
|
||||||
value = true
|
|
||||||
if scanner.scan(/\s*=\s*/)
|
|
||||||
if delim = scanner.scan(/['"]/)
|
|
||||||
value = ""
|
|
||||||
while text = scanner.scan(/[^#{delim}\\]+|./)
|
|
||||||
case text
|
|
||||||
when "\\" then
|
|
||||||
value << text
|
|
||||||
value << scanner.getch
|
|
||||||
when delim
|
|
||||||
break
|
|
||||||
else value << text
|
|
||||||
end
|
|
||||||
end
|
|
||||||
else
|
|
||||||
value = scanner.scan(/[^\s>\/]+/)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
attributes[attr] = value
|
|
||||||
scanner.skip(/\s*/)
|
|
||||||
end
|
|
||||||
|
|
||||||
closing = ( scanner.scan(/\//) ? :self : nil )
|
|
||||||
end
|
|
||||||
|
|
||||||
unless scanner.scan(/\s*>/)
|
|
||||||
if strict
|
|
||||||
raise "expected > (got #{scanner.rest.inspect} for #{content}, #{attributes.inspect})"
|
|
||||||
else
|
|
||||||
# throw away all text until we find what we're looking for
|
|
||||||
scanner.skip_until(/>/) or scanner.terminate
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
Tag.new(parent, line, pos, name, attributes, closing)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# A node that represents text, rather than markup.
|
|
||||||
class Text < Node #:nodoc:
|
|
||||||
|
|
||||||
attr_reader :content
|
|
||||||
|
|
||||||
# Creates a new text node as a child of the given parent, with the given
|
|
||||||
# content.
|
|
||||||
def initialize(parent, line, pos, content)
|
|
||||||
super(parent, line, pos)
|
|
||||||
@content = content
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns the content of this node.
|
|
||||||
def to_s
|
|
||||||
@content
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns +self+ if this node meets the given conditions. Text nodes support
|
|
||||||
# conditions of the following kinds:
|
|
||||||
#
|
|
||||||
# * if +conditions+ is a string, it must be a substring of the node's
|
|
||||||
# content
|
|
||||||
# * if +conditions+ is a regular expression, it must match the node's
|
|
||||||
# content
|
|
||||||
# * if +conditions+ is a hash, it must contain a <tt>:content</tt> key that
|
|
||||||
# is either a string or a regexp, and which is interpreted as described
|
|
||||||
# above.
|
|
||||||
def find(conditions)
|
|
||||||
match(conditions) && self
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns non-+nil+ if this node meets the given conditions, or +nil+
|
|
||||||
# otherwise. See the discussion of #find for the valid conditions.
|
|
||||||
def match(conditions)
|
|
||||||
case conditions
|
|
||||||
when String
|
|
||||||
@content == conditions
|
|
||||||
when Regexp
|
|
||||||
@content =~ conditions
|
|
||||||
when Hash
|
|
||||||
conditions = validate_conditions(conditions)
|
|
||||||
|
|
||||||
# Text nodes only have :content, :parent, :ancestor
|
|
||||||
unless (conditions.keys - [:content, :parent, :ancestor]).empty?
|
|
||||||
return false
|
|
||||||
end
|
|
||||||
|
|
||||||
match(conditions[:content])
|
|
||||||
else
|
|
||||||
nil
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def ==(node)
|
|
||||||
return false unless super
|
|
||||||
content == node.content
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# A CDATA node is simply a text node with a specialized way of displaying
|
|
||||||
# itself.
|
|
||||||
class CDATA < Text #:nodoc:
|
|
||||||
def to_s
|
|
||||||
"<![CDATA[#{super}]>"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# A Tag is any node that represents markup. It may be an opening tag, a
|
|
||||||
# closing tag, or a self-closing tag. It has a name, and may have a hash of
|
|
||||||
# attributes.
|
|
||||||
class Tag < Node #:nodoc:
|
|
||||||
|
|
||||||
# Either +nil+, <tt>:close</tt>, or <tt>:self</tt>
|
|
||||||
attr_reader :closing
|
|
||||||
|
|
||||||
# Either +nil+, or a hash of attributes for this node.
|
|
||||||
attr_reader :attributes
|
|
||||||
|
|
||||||
# The name of this tag.
|
|
||||||
attr_reader :name
|
|
||||||
|
|
||||||
# Create a new node as a child of the given parent, using the given content
|
|
||||||
# to describe the node. It will be parsed and the node name, attributes and
|
|
||||||
# closing status extracted.
|
|
||||||
def initialize(parent, line, pos, name, attributes, closing)
|
|
||||||
super(parent, line, pos)
|
|
||||||
@name = name
|
|
||||||
@attributes = attributes
|
|
||||||
@closing = closing
|
|
||||||
end
|
|
||||||
|
|
||||||
# A convenience for obtaining an attribute of the node. Returns +nil+ if
|
|
||||||
# the node has no attributes.
|
|
||||||
def [](attr)
|
|
||||||
@attributes ? @attributes[attr] : nil
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns non-+nil+ if this tag can contain child nodes.
|
|
||||||
def childless?(xml = false)
|
|
||||||
return false if xml && @closing.nil?
|
|
||||||
!@closing.nil? ||
|
|
||||||
@name =~ /^(img|br|hr|link|meta|area|base|basefont|
|
|
||||||
col|frame|input|isindex|param)$/ox
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns a textual representation of the node
|
|
||||||
def to_s
|
|
||||||
if @closing == :close
|
|
||||||
"</#{@name}>"
|
|
||||||
else
|
|
||||||
s = "<#{@name}"
|
|
||||||
@attributes.each do |k,v|
|
|
||||||
s << " #{k}"
|
|
||||||
s << "=\"#{v}\"" if String === v
|
|
||||||
end
|
|
||||||
s << " /" if @closing == :self
|
|
||||||
s << ">"
|
|
||||||
@children.each { |child| s << child.to_s }
|
|
||||||
s << "</#{@name}>" if @closing != :self && !@children.empty?
|
|
||||||
s
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# If either the node or any of its children meet the given conditions, the
|
|
||||||
# matching node is returned. Otherwise, +nil+ is returned. (See the
|
|
||||||
# description of the valid conditions in the +match+ method.)
|
|
||||||
def find(conditions)
|
|
||||||
match(conditions) && self || super
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns +true+, indicating that this node represents an HTML tag.
|
|
||||||
def tag?
|
|
||||||
true
|
|
||||||
end
|
|
||||||
|
|
||||||
# Returns +true+ if the node meets any of the given conditions. The
|
|
||||||
# +conditions+ parameter must be a hash of any of the following keys
|
|
||||||
# (all are optional):
|
|
||||||
#
|
|
||||||
# * <tt>:tag</tt>: the node name must match the corresponding value
|
|
||||||
# * <tt>:attributes</tt>: a hash. The node's values must match the
|
|
||||||
# corresponding values in the hash.
|
|
||||||
# * <tt>:parent</tt>: a hash. The node's parent must match the
|
|
||||||
# corresponding hash.
|
|
||||||
# * <tt>:child</tt>: a hash. At least one of the node's immediate children
|
|
||||||
# must meet the criteria described by the hash.
|
|
||||||
# * <tt>:ancestor</tt>: a hash. At least one of the node's ancestors must
|
|
||||||
# meet the criteria described by the hash.
|
|
||||||
# * <tt>:descendant</tt>: a hash. At least one of the node's descendants
|
|
||||||
# must meet the criteria described by the hash.
|
|
||||||
# * <tt>:sibling</tt>: a hash. At least one of the node's siblings must
|
|
||||||
# meet the criteria described by the hash.
|
|
||||||
# * <tt>:after</tt>: a hash. The node must be after any sibling meeting
|
|
||||||
# the criteria described by the hash, and at least one sibling must match.
|
|
||||||
# * <tt>:before</tt>: a hash. The node must be before any sibling meeting
|
|
||||||
# the criteria described by the hash, and at least one sibling must match.
|
|
||||||
# * <tt>:children</tt>: a hash, for counting children of a node. Accepts the
|
|
||||||
# keys:
|
|
||||||
# ** <tt>:count</tt>: either a number or a range which must equal (or
|
|
||||||
# include) the number of children that match.
|
|
||||||
# ** <tt>:less_than</tt>: the number of matching children must be less than
|
|
||||||
# this number.
|
|
||||||
# ** <tt>:greater_than</tt>: the number of matching children must be
|
|
||||||
# greater than this number.
|
|
||||||
# ** <tt>:only</tt>: another hash consisting of the keys to use
|
|
||||||
# to match on the children, and only matching children will be
|
|
||||||
# counted.
|
|
||||||
#
|
|
||||||
# Conditions are matched using the following algorithm:
|
|
||||||
#
|
|
||||||
# * if the condition is a string, it must be a substring of the value.
|
|
||||||
# * if the condition is a regexp, it must match the value.
|
|
||||||
# * if the condition is a number, the value must match number.to_s.
|
|
||||||
# * if the condition is +true+, the value must not be +nil+.
|
|
||||||
# * if the condition is +false+ or +nil+, the value must be +nil+.
|
|
||||||
#
|
|
||||||
# Usage:
|
|
||||||
#
|
|
||||||
# # test if the node is a "span" tag
|
|
||||||
# node.match :tag => "span"
|
|
||||||
#
|
|
||||||
# # test if the node's parent is a "div"
|
|
||||||
# node.match :parent => { :tag => "div" }
|
|
||||||
#
|
|
||||||
# # test if any of the node's ancestors are "table" tags
|
|
||||||
# node.match :ancestor => { :tag => "table" }
|
|
||||||
#
|
|
||||||
# # test if any of the node's immediate children are "em" tags
|
|
||||||
# node.match :child => { :tag => "em" }
|
|
||||||
#
|
|
||||||
# # test if any of the node's descendants are "strong" tags
|
|
||||||
# node.match :descendant => { :tag => "strong" }
|
|
||||||
#
|
|
||||||
# # test if the node has between 2 and 4 span tags as immediate children
|
|
||||||
# node.match :children => { :count => 2..4, :only => { :tag => "span" } }
|
|
||||||
#
|
|
||||||
# # get funky: test to see if the node is a "div", has a "ul" ancestor
|
|
||||||
# # and an "li" parent (with "class" = "enum"), and whether or not it has
|
|
||||||
# # a "span" descendant that contains # text matching /hello world/:
|
|
||||||
# node.match :tag => "div",
|
|
||||||
# :ancestor => { :tag => "ul" },
|
|
||||||
# :parent => { :tag => "li",
|
|
||||||
# :attributes => { :class => "enum" } },
|
|
||||||
# :descendant => { :tag => "span",
|
|
||||||
# :child => /hello world/ }
|
|
||||||
def match(conditions)
|
|
||||||
conditions = validate_conditions(conditions)
|
|
||||||
# check content of child nodes
|
|
||||||
if conditions[:content]
|
|
||||||
if children.empty?
|
|
||||||
return false unless match_condition("", conditions[:content])
|
|
||||||
else
|
|
||||||
return false unless children.find { |child| child.match(conditions[:content]) }
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# test the name
|
|
||||||
return false unless match_condition(@name, conditions[:tag]) if conditions[:tag]
|
|
||||||
|
|
||||||
# test attributes
|
|
||||||
(conditions[:attributes] || {}).each do |key, value|
|
|
||||||
return false unless match_condition(self[key], value)
|
|
||||||
end
|
|
||||||
|
|
||||||
# test parent
|
|
||||||
return false unless parent.match(conditions[:parent]) if conditions[:parent]
|
|
||||||
|
|
||||||
# test children
|
|
||||||
return false unless children.find { |child| child.match(conditions[:child]) } if conditions[:child]
|
|
||||||
|
|
||||||
# test ancestors
|
|
||||||
if conditions[:ancestor]
|
|
||||||
return false unless catch :found do
|
|
||||||
p = self
|
|
||||||
throw :found, true if p.match(conditions[:ancestor]) while p = p.parent
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# test descendants
|
|
||||||
if conditions[:descendant]
|
|
||||||
return false unless children.find do |child|
|
|
||||||
# test the child
|
|
||||||
child.match(conditions[:descendant]) ||
|
|
||||||
# test the child's descendants
|
|
||||||
child.match(:descendant => conditions[:descendant])
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# count children
|
|
||||||
if opts = conditions[:children]
|
|
||||||
matches = children.select do |c|
|
|
||||||
(c.kind_of?(HTML::Tag) and (c.closing == :self or ! c.childless?))
|
|
||||||
end
|
|
||||||
|
|
||||||
matches = matches.select { |c| c.match(opts[:only]) } if opts[:only]
|
|
||||||
opts.each do |key, value|
|
|
||||||
next if key == :only
|
|
||||||
case key
|
|
||||||
when :count
|
|
||||||
if Integer === value
|
|
||||||
return false if matches.length != value
|
|
||||||
else
|
|
||||||
return false unless value.include?(matches.length)
|
|
||||||
end
|
|
||||||
when :less_than
|
|
||||||
return false unless matches.length < value
|
|
||||||
when :greater_than
|
|
||||||
return false unless matches.length > value
|
|
||||||
else raise "unknown count condition #{key}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# test siblings
|
|
||||||
if conditions[:sibling] || conditions[:before] || conditions[:after]
|
|
||||||
siblings = parent ? parent.children : []
|
|
||||||
self_index = siblings.index(self)
|
|
||||||
|
|
||||||
if conditions[:sibling]
|
|
||||||
return false unless siblings.detect do |s|
|
|
||||||
s != self && s.match(conditions[:sibling])
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
if conditions[:before]
|
|
||||||
return false unless siblings[self_index+1..-1].detect do |s|
|
|
||||||
s != self && s.match(conditions[:before])
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
if conditions[:after]
|
|
||||||
return false unless siblings[0,self_index].detect do |s|
|
|
||||||
s != self && s.match(conditions[:after])
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
true
|
|
||||||
end
|
|
||||||
|
|
||||||
def ==(node)
|
|
||||||
return false unless super
|
|
||||||
return false unless closing == node.closing && self.name == node.name
|
|
||||||
attributes == node.attributes
|
|
||||||
end
|
|
||||||
|
|
||||||
private
|
|
||||||
# Match the given value to the given condition.
|
|
||||||
def match_condition(value, condition)
|
|
||||||
case condition
|
|
||||||
when String
|
|
||||||
value && value == condition
|
|
||||||
when Regexp
|
|
||||||
value && value.match(condition)
|
|
||||||
when Numeric
|
|
||||||
value == condition.to_s
|
|
||||||
when true
|
|
||||||
!value.nil?
|
|
||||||
when false, nil
|
|
||||||
value.nil?
|
|
||||||
else
|
|
||||||
false
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
215
lib/sanitize.rb
215
lib/sanitize.rb
|
@ -1,207 +1,26 @@
|
||||||
module Sanitize
|
module Sanitize
|
||||||
|
|
||||||
# This module provides sanitization of XHTML+MathML+SVG
|
# This module provides sanitization of XHTML+MathML+SVG
|
||||||
# and of inline style attributes.
|
# and of inline style attributes.
|
||||||
#
|
#
|
||||||
# Based heavily on Sam Ruby's code in the Universal FeedParser.
|
# Uses the HTML5lib parser, so that the parsing behaviour should
|
||||||
|
# resemble that of browsers.
|
||||||
require 'html/tokenizer'
|
#
|
||||||
require 'node'
|
# sanitize_xhtml() is a case-sensitive sanitizer, suitable for XHTML
|
||||||
|
# sanitize_html() is a case-insensitive sanitizer suitable for HTML
|
||||||
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b',
|
|
||||||
'big', 'blockquote', 'br', 'button', 'caption', 'center', 'cite',
|
|
||||||
'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt',
|
|
||||||
'em', 'fieldset', 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
|
||||||
'hr', 'i', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'map',
|
|
||||||
'menu', 'ol', 'optgroup', 'option', 'p', 'pre', 'q', 's', 'samp',
|
|
||||||
'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'table',
|
|
||||||
'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u',
|
|
||||||
'ul', 'var']
|
|
||||||
|
|
||||||
mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
|
|
||||||
'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
|
|
||||||
'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
|
|
||||||
'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
|
|
||||||
'munderover', 'none']
|
|
||||||
|
|
||||||
svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
|
|
||||||
'animateTransform', 'circle', 'defs', 'desc', 'ellipse', 'font-face',
|
|
||||||
'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern', 'image',
|
|
||||||
'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
|
|
||||||
'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
|
|
||||||
'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
|
|
||||||
|
|
||||||
acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
|
|
||||||
'action', 'align', 'alt', 'axis', 'border', 'cellpadding',
|
|
||||||
'cellspacing', 'char', 'charoff', 'charset', 'checked', 'cite', 'class',
|
|
||||||
'clear', 'cols', 'colspan', 'color', 'compact', 'coords', 'datetime',
|
|
||||||
'dir', 'disabled', 'enctype', 'for', 'frame', 'headers', 'height',
|
|
||||||
'href', 'hreflang', 'hspace', 'id', 'ismap', 'label', 'lang',
|
|
||||||
'longdesc', 'maxlength', 'media', 'method', 'multiple', 'name',
|
|
||||||
'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 'rel', 'rev',
|
|
||||||
'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size',
|
|
||||||
'span', 'src', 'start', 'style', 'summary', 'tabindex', 'target', 'title',
|
|
||||||
'type', 'usemap', 'valign', 'value', 'vspace', 'width', 'xml:lang']
|
|
||||||
|
|
||||||
|
|
||||||
mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
|
require 'html5lib/sanitizer'
|
||||||
'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
|
require 'html5lib/html5parser'
|
||||||
'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
|
require 'html5lib/liberalxmlparser'
|
||||||
'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
|
include HTML5lib
|
||||||
'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
|
|
||||||
'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
|
|
||||||
'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
|
|
||||||
'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
|
|
||||||
'xlink:type', 'xmlns', 'xmlns:xlink']
|
|
||||||
|
|
||||||
|
def sanitize_xhtml(html)
|
||||||
svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
|
XHTMLParser.parseFragment(html, :tokenizer => HTMLSanitizer).to_s
|
||||||
'arabic-form', 'ascent', 'attributeName', 'attributeType',
|
end
|
||||||
'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
|
|
||||||
'class', 'color', 'color-rendering', 'content', 'cx', 'cy', 'd', 'dx',
|
|
||||||
'dy', 'descent', 'display', 'dur', 'end', 'fill', 'fill-rule',
|
|
||||||
'font-family', 'font-size', 'font-stretch', 'font-style', 'font-variant',
|
|
||||||
'font-weight', 'from', 'fx', 'fy', 'g1', 'g2', 'glyph-name',
|
|
||||||
'gradientUnits', 'hanging', 'height', 'horiz-adv-x', 'horiz-origin-x',
|
|
||||||
'id', 'ideographic', 'k', 'keyPoints', 'keySplines', 'keyTimes',
|
|
||||||
'lang', 'marker-end', 'marker-mid', 'marker-start', 'markerHeight',
|
|
||||||
'markerUnits', 'markerWidth', 'mathematical', 'max', 'min', 'name',
|
|
||||||
'offset', 'opacity', 'orient', 'origin', 'overline-position',
|
|
||||||
'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
|
|
||||||
'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount', 'repeatDur',
|
|
||||||
'requiredExtensions', 'requiredFeatures', 'restart', 'rotate', 'rx',
|
|
||||||
'ry', 'slope', 'stemh', 'stemv', 'stop-color', 'stop-opacity',
|
|
||||||
'strikethrough-position', 'strikethrough-thickness', 'stroke',
|
|
||||||
'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
|
|
||||||
'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
|
|
||||||
'stroke-width', 'systemLanguage', 'target',
|
|
||||||
'text-anchor', 'to', 'transform', 'type', 'u1', 'u2',
|
|
||||||
'underline-position', 'underline-thickness', 'unicode',
|
|
||||||
'unicode-range', 'units-per-em', 'values', 'version', 'viewBox',
|
|
||||||
'visibility', 'width', 'widths', 'x', 'x-height', 'x1', 'x2',
|
|
||||||
'xlink:actuate', 'xlink:arcrole', 'xlink:href', 'xlink:role',
|
|
||||||
'xlink:show', 'xlink:title', 'xlink:type', 'xml:base', 'xml:lang',
|
|
||||||
'xml:space', 'xmlns', 'xmlns:xlink', 'y', 'y1', 'y2', 'zoomAndPan']
|
|
||||||
|
|
||||||
attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'xlink:href']
|
def sanitize_html(html)
|
||||||
|
HTMLParser.parseFragment(html, :tokenizer => HTMLSanitizer).to_s
|
||||||
acceptable_css_properties = ['azimuth', 'background-color',
|
end
|
||||||
'border-bottom-color', 'border-collapse', 'border-color',
|
|
||||||
'border-left-color', 'border-right-color', 'border-top-color', 'clear',
|
|
||||||
'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
|
|
||||||
'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
|
|
||||||
'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
|
|
||||||
'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
|
|
||||||
'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
|
|
||||||
'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
|
|
||||||
'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
|
|
||||||
'white-space', 'width']
|
|
||||||
|
|
||||||
acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
|
end
|
||||||
'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
|
|
||||||
'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
|
|
||||||
'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
|
|
||||||
'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
|
|
||||||
'transparent', 'underline', 'white', 'yellow']
|
|
||||||
|
|
||||||
acceptable_svg_properties = [ 'fill', 'fill-opacity', 'fill-rule',
|
|
||||||
'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
|
|
||||||
'stroke-opacity']
|
|
||||||
|
|
||||||
acceptable_protocols = [ 'ed2k', 'ftp', 'http', 'https', 'irc',
|
|
||||||
'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
|
|
||||||
'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
|
|
||||||
'ssh', 'sftp', 'rtsp', 'afs' ]
|
|
||||||
|
|
||||||
ALLOWED_ELEMENTS = acceptable_elements + mathml_elements + svg_elements unless defined?(ALLOWED_ELEMENTS)
|
|
||||||
ALLOWED_ATTRIBUTES = acceptable_attributes + mathml_attributes + svg_attributes unless defined?(ALLOWED_ATTRIBUTES)
|
|
||||||
ALLOWED_CSS_PROPERTIES = acceptable_css_properties unless defined?(ALLOWED_CSS_PROPERTIES)
|
|
||||||
ALLOWED_CSS_KEYWORDS = acceptable_css_keywords unless defined?(ALLOWED_CSS_KEYWORDS)
|
|
||||||
ALLOWED_SVG_PROPERTIES = acceptable_svg_properties unless defined?(ALLOWED_SVG_PROPERTIES)
|
|
||||||
ALLOWED_PROTOCOLS = acceptable_protocols unless defined?(ALLOWED_PROTOCOLS)
|
|
||||||
ATTR_VAL_IS_URI = attr_val_is_uri unless defined?(ATTR_VAL_IS_URI)
|
|
||||||
|
|
||||||
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and stripping out all
|
|
||||||
# attributes not in ALLOWED_ATTRIBUTES. Style attributes are parsed, and a restricted set,
|
|
||||||
# specified by ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
|
|
||||||
# attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified in
|
|
||||||
# ALLOWED_PROTOCOLS are allowed.
|
|
||||||
# You can adjust what gets sanitized, by defining these constant arrays before this Module is loaded.
|
|
||||||
#
|
|
||||||
# sanitize_html('<script> do_nasty_stuff() </script>')
|
|
||||||
# => <script> do_nasty_stuff() </script>
|
|
||||||
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
|
|
||||||
# => <a>Click here for $100</a>
|
|
||||||
def sanitize_xhtml(html)
|
|
||||||
if html.index("<")
|
|
||||||
tokenizer = HTML::Tokenizer.new(html)
|
|
||||||
new_text = ""
|
|
||||||
|
|
||||||
while token = tokenizer.next
|
|
||||||
node = XHTML::Node.parse(nil, 0, 0, token, false)
|
|
||||||
new_text << case node.tag?
|
|
||||||
when true
|
|
||||||
if ALLOWED_ELEMENTS.include?(node.name)
|
|
||||||
if node.closing != :close
|
|
||||||
node.attributes.delete_if { |attr,v| !ALLOWED_ATTRIBUTES.include?(attr) }
|
|
||||||
ATTR_VAL_IS_URI.each do |attr|
|
|
||||||
val_unescaped = CGI.unescapeHTML(node.attributes[attr].to_s).gsub(/[\000-\040\177\s]+|\302*[\200-\240]/,'').downcase
|
|
||||||
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0])
|
|
||||||
node.attributes.delete attr
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if node.attributes['style']
|
|
||||||
node.attributes['style'] = sanitize_css(node.attributes['style'])
|
|
||||||
end
|
|
||||||
end
|
|
||||||
node.to_s
|
|
||||||
else
|
|
||||||
node.to_s.gsub(/</, "<")
|
|
||||||
end
|
|
||||||
else
|
|
||||||
node.to_s.gsub(/</, "<")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
html = new_text
|
|
||||||
end
|
|
||||||
html
|
|
||||||
end
|
|
||||||
|
|
||||||
def sanitize_css(style)
|
|
||||||
# disallow urls
|
|
||||||
style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
|
|
||||||
|
|
||||||
# gauntlet
|
|
||||||
if style !~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/
|
|
||||||
style = ''
|
|
||||||
return style
|
|
||||||
end
|
|
||||||
if style !~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$/
|
|
||||||
style = ''
|
|
||||||
return style
|
|
||||||
end
|
|
||||||
|
|
||||||
clean = []
|
|
||||||
style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val|
|
|
||||||
if ALLOWED_CSS_PROPERTIES.include?(prop.downcase)
|
|
||||||
clean << prop + ': ' + val + ';'
|
|
||||||
elsif ['background','border','margin','padding'].include?(prop.split('-')[0].downcase)
|
|
||||||
goodval = true
|
|
||||||
val.split().each do |keyword|
|
|
||||||
if !ALLOWED_CSS_KEYWORDS.include?(keyword) and
|
|
||||||
keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
|
|
||||||
goodval = false
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if goodval
|
|
||||||
clean << prop + ': ' + val + ';'
|
|
||||||
end
|
|
||||||
elsif ALLOWED_SVG_PROPERTIES.include?(prop.downcase)
|
|
||||||
clean << prop + ': ' + val + ';'
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
style = clean.join(' ')
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
4
vendor/plugins/HTML5lib/Rakefile.rb
vendored
4
vendor/plugins/HTML5lib/Rakefile.rb
vendored
|
@ -2,6 +2,6 @@ require 'rake'
|
||||||
require 'rake/testtask'
|
require 'rake/testtask'
|
||||||
|
|
||||||
Rake::TestTask.new do |task|
|
Rake::TestTask.new do |task|
|
||||||
task.pattern = 'tests/test_*.rb'
|
task.pattern = 'tests/test_*.rb'
|
||||||
task.verbose = true
|
task.verbose = true
|
||||||
end
|
end
|
||||||
|
|
1314
vendor/plugins/HTML5lib/lib/html5lib/constants.rb
vendored
1314
vendor/plugins/HTML5lib/lib/html5lib/constants.rb
vendored
File diff suppressed because it is too large
Load diff
2102
vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb
vendored
2102
vendor/plugins/HTML5lib/lib/html5lib/html5parser.rb
vendored
File diff suppressed because it is too large
Load diff
46
vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb
vendored
Normal file
46
vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_body_phase.rb
vendored
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class AfterBodyPhase < Phase
|
||||||
|
|
||||||
|
handle_end 'html'
|
||||||
|
|
||||||
|
def processComment(data)
|
||||||
|
# This is needed because data is to be appended to the <html> element
|
||||||
|
# here and not to whatever is currently open.
|
||||||
|
@tree.insertComment(data, @tree.openElements[0])
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.parseError(_('Unexpected non-space characters in the after body phase.'))
|
||||||
|
@parser.phase = @parser.phases[:inBody]
|
||||||
|
@parser.phase.processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processStartTag(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected start tag token (#{name}) in the after body phase."))
|
||||||
|
@parser.phase = @parser.phases[:inBody]
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagHtml(name)
|
||||||
|
if @parser.innerHTML
|
||||||
|
@parser.parseError
|
||||||
|
else
|
||||||
|
# XXX: This may need to be done, not sure
|
||||||
|
# Don't set lastPhase to the current phase but to the inBody phase
|
||||||
|
# instead. No need for extra parse errors if there's something after </html>.
|
||||||
|
# Try "<!doctype html>X</html>X" for instance.
|
||||||
|
@parser.lastPhase = @parser.phase
|
||||||
|
@parser.phase = @parser.phases[:trailingEnd]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag token (#{name}) in the after body phase."))
|
||||||
|
@parser.phase = @parser.phases[:inBody]
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
34
vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb
vendored
Normal file
34
vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_frameset_phase.rb
vendored
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class AfterFramesetPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#after3
|
||||||
|
|
||||||
|
handle_start 'html', 'noframes'
|
||||||
|
|
||||||
|
handle_end 'html'
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.parseError(_('Unexpected non-space characters in the after frameset phase. Ignored.'))
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagNoframes(name, attributes)
|
||||||
|
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagHtml(name)
|
||||||
|
@parser.lastPhase = @parser.phase
|
||||||
|
@parser.phase = @parser.phases[:trailingEnd]
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
50
vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb
vendored
Normal file
50
vendor/plugins/HTML5lib/lib/html5lib/html5parser/after_head_phase.rb
vendored
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class AfterHeadPhase < Phase
|
||||||
|
|
||||||
|
handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'
|
||||||
|
|
||||||
|
def processEOF
|
||||||
|
anythingElse
|
||||||
|
@parser.phase.processEOF
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
anythingElse
|
||||||
|
@parser.phase.processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagBody(name, attributes)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inBody]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagFrameset(name, attributes)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inFrameset]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagFromHead(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected start tag (#{name}) that can be in head. Moved."))
|
||||||
|
@parser.phase = @parser.phases[:inHead]
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
anythingElse
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processEndTag(name)
|
||||||
|
anythingElse
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def anythingElse
|
||||||
|
@tree.insertElement('body', {})
|
||||||
|
@parser.phase = @parser.phases[:inBody]
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
41
vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb
vendored
Normal file
41
vendor/plugins/HTML5lib/lib/html5lib/html5parser/before_head_phase.rb
vendored
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class BeforeHeadPhase < Phase
|
||||||
|
|
||||||
|
handle_start 'html', 'head'
|
||||||
|
|
||||||
|
handle_end 'html'
|
||||||
|
|
||||||
|
def processEOF
|
||||||
|
startTagHead('head', {})
|
||||||
|
@parser.phase.processEOF
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
startTagHead('head', {})
|
||||||
|
@parser.phase.processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagHead(name, attributes)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.headPointer = @tree.openElements[-1]
|
||||||
|
@parser.phase = @parser.phases[:inHead]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
startTagHead('head', {})
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagHtml(name)
|
||||||
|
startTagHead('head', {})
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}) after the (implied) root element."))
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
548
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb
vendored
Normal file
548
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_body_phase.rb
vendored
Normal file
|
@ -0,0 +1,548 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InBodyPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-body
|
||||||
|
|
||||||
|
handle_start 'html', 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
|
||||||
|
|
||||||
|
handle_start 'input', 'textarea', 'select', 'isindex', %w( script style ), %w( marquee object )
|
||||||
|
|
||||||
|
handle_start %w( li dd dt ) => 'ListItem', %w( base link meta title ) => 'FromHead'
|
||||||
|
|
||||||
|
handle_start %w( address blockquote center dir div dl fieldset listing menu ol p pre ul ) => 'CloseP'
|
||||||
|
|
||||||
|
handle_start %w( b big em font i nobr s small strike strong tt u ) => 'Formatting'
|
||||||
|
|
||||||
|
handle_start %w( area basefont bgsound br embed img param spacer wbr ) => 'VoidFormatting'
|
||||||
|
|
||||||
|
handle_start %w( iframe noembed noframes noscript ) => 'Cdata', HEADING_ELEMENTS => 'Heading'
|
||||||
|
|
||||||
|
handle_start %w( caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr ) => 'Misplaced'
|
||||||
|
|
||||||
|
handle_start %w( event-source section nav article aside header footer datagrid command ) => 'New'
|
||||||
|
|
||||||
|
handle_end 'p', 'body', 'html', 'form', %w( button marquee object ), %w( dd dt li ) => 'ListItem'
|
||||||
|
|
||||||
|
handle_end %w( address blockquote center div dl fieldset listing menu ol pre ul ) => 'Block'
|
||||||
|
|
||||||
|
handle_end %w( a b big em font i nobr s small strike strong tt u ) => 'Formatting'
|
||||||
|
|
||||||
|
handle_end %w( head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th ) => 'Misplaced'
|
||||||
|
|
||||||
|
handle_end %w( area basefont bgsound br embed hr image img input isindex param spacer wbr frame ) => 'None'
|
||||||
|
|
||||||
|
handle_end %w( noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
|
||||||
|
|
||||||
|
handle_end %w( event-source section nav article aside header footer datagrid command ) => 'New'
|
||||||
|
|
||||||
|
def initialize(parser, tree)
|
||||||
|
super(parser, tree)
|
||||||
|
|
||||||
|
# for special handling of whitespace in <pre>
|
||||||
|
@processSpaceCharactersPre = false
|
||||||
|
end
|
||||||
|
|
||||||
|
def processSpaceCharactersPre(data)
|
||||||
|
#Sometimes (start of <pre> blocks) we want to drop leading newlines
|
||||||
|
@processSpaceCharactersPre = false
|
||||||
|
if (data.length > 0 and data[0] == ?\n and
|
||||||
|
@tree.openElements[-1].name == 'pre' and
|
||||||
|
not @tree.openElements[-1].hasContent)
|
||||||
|
data = data[1..-1]
|
||||||
|
end
|
||||||
|
@tree.insertText(data) if data.length > 0
|
||||||
|
end
|
||||||
|
|
||||||
|
def processSpaceCharacters(data)
|
||||||
|
if @processSpaceCharactersPre
|
||||||
|
processSpaceCharactersPre(data)
|
||||||
|
else
|
||||||
|
super(data)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
# XXX The specification says to do this for every character at the
|
||||||
|
# moment, but apparently that doesn't match the real world so we don't
|
||||||
|
# do it for space characters.
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
@tree.insertText(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagScriptStyle(name, attributes)
|
||||||
|
@parser.phases[:inHead].processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagFromHead(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
|
||||||
|
@parser.phases[:inHead].processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagBody(name, attributes)
|
||||||
|
@parser.parseError(_('Unexpected start tag (body).'))
|
||||||
|
|
||||||
|
if (@tree.openElements.length == 1 or
|
||||||
|
@tree.openElements[1].name != 'body')
|
||||||
|
assert @parser.innerHTML
|
||||||
|
else
|
||||||
|
attributes.each do |attr, value|
|
||||||
|
unless @tree.openElements[1].attributes.has_key?(attr)
|
||||||
|
@tree.openElements[1].attributes[attr] = value
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagCloseP(name, attributes)
|
||||||
|
endTagP('p') if in_scope?('p')
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@processSpaceCharactersPre = true if name == 'pre'
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagForm(name, attributes)
|
||||||
|
if @tree.formPointer
|
||||||
|
@parser.parseError('Unexpected start tag (form). Ignored.')
|
||||||
|
else
|
||||||
|
endTagP('p') if in_scope?('p')
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.formPointer = @tree.openElements[-1]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagListItem(name, attributes)
|
||||||
|
endTagP('p') if in_scope?('p')
|
||||||
|
stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
|
||||||
|
stopName = stopNames[name]
|
||||||
|
|
||||||
|
@tree.openElements.reverse.each_with_index do |node, i|
|
||||||
|
if stopName.include?(node.name)
|
||||||
|
(i + 1).times { @tree.openElements.pop }
|
||||||
|
break
|
||||||
|
end
|
||||||
|
|
||||||
|
# Phrasing elements are all non special, non scoping, non
|
||||||
|
# formatting elements
|
||||||
|
break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) and
|
||||||
|
not ['address', 'div'].include?(node.name))
|
||||||
|
end
|
||||||
|
|
||||||
|
# Always insert an <li> element.
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagPlaintext(name, attributes)
|
||||||
|
endTagP('p') if in_scope?('p')
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.tokenizer.contentModelFlag = :PLAINTEXT
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagHeading(name, attributes)
|
||||||
|
endTagP('p') if in_scope?('p')
|
||||||
|
HEADING_ELEMENTS.each do |element|
|
||||||
|
if in_scope?(element)
|
||||||
|
@parser.parseError(_("Unexpected start tag (#{name})."))
|
||||||
|
|
||||||
|
remove_open_elements_until { |element| HEADING_ELEMENTS.include?(element.name) }
|
||||||
|
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagA(name, attributes)
|
||||||
|
if afeAElement = @tree.elementInActiveFormattingElements('a')
|
||||||
|
@parser.parseError(_('Unexpected start tag (a) implies end tag (a).'))
|
||||||
|
endTagFormatting('a')
|
||||||
|
@tree.openElements.delete(afeAElement) if @tree.openElements.include?(afeAElement)
|
||||||
|
@tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
|
||||||
|
end
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
addFormattingElement(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagFormatting(name, attributes)
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
addFormattingElement(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagButton(name, attributes)
|
||||||
|
if in_scope?('button')
|
||||||
|
@parser.parseError(_('Unexpected start tag (button) implied end tag (button).'))
|
||||||
|
processEndTag('button')
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
else
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.activeFormattingElements.push(Marker)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagMarqueeObject(name, attributes)
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.activeFormattingElements.push(Marker)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagXmp(name, attributes)
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.tokenizer.contentModelFlag = :CDATA
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTable(name, attributes)
|
||||||
|
processEndTag('p') if in_scope?('p')
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inTable]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagVoidFormatting(name, attributes)
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagHr(name, attributes)
|
||||||
|
endTagP('p') if in_scope?('p')
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagImage(name, attributes)
|
||||||
|
# No really...
|
||||||
|
@parser.parseError(_('Unexpected start tag (image). Treated as img.'))
|
||||||
|
processStartTag('img', attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagInput(name, attributes)
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
if @tree.formPointer
|
||||||
|
# XXX Not exactly sure what to do here
|
||||||
|
# @tree.openElements[-1].form = @tree.formPointer
|
||||||
|
end
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagIsindex(name, attributes)
|
||||||
|
@parser.parseError("Unexpected start tag isindex. Don't use it!")
|
||||||
|
return if @tree.formPointer
|
||||||
|
processStartTag('form', {})
|
||||||
|
processStartTag('hr', {})
|
||||||
|
processStartTag('p', {})
|
||||||
|
processStartTag('label', {})
|
||||||
|
# XXX Localization ...
|
||||||
|
processCharacters('This is a searchable index. Insert your search keywords here:')
|
||||||
|
attributes['name'] = 'isindex'
|
||||||
|
attrs = attributes.to_a
|
||||||
|
processStartTag('input', attributes)
|
||||||
|
processEndTag('label')
|
||||||
|
processEndTag('p')
|
||||||
|
processStartTag('hr', {})
|
||||||
|
processEndTag('form')
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTextarea(name, attributes)
|
||||||
|
# XXX Form element pointer checking here as well...
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.tokenizer.contentModelFlag = :RCDATA
|
||||||
|
end
|
||||||
|
|
||||||
|
# iframe, noembed noframes, noscript(if scripting enabled)
|
||||||
|
def startTagCdata(name, attributes)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.tokenizer.contentModelFlag = :CDATA
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagSelect(name, attributes)
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inSelect]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagMisplaced(name, attributes)
|
||||||
|
# Elements that should be children of other elements that have a
|
||||||
|
# different insertion mode; here they are ignored
|
||||||
|
# "caption", "col", "colgroup", "frame", "frameset", "head",
|
||||||
|
# "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
|
||||||
|
# "tr", "noscript"
|
||||||
|
@parser.parseError(_("Unexpected start tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagNew(name, attributes)
|
||||||
|
# New HTML5 elements, "event-source", "section", "nav",
|
||||||
|
# "article", "aside", "header", "footer", "datagrid", "command"
|
||||||
|
sys.stderr.write("Warning: Undefined behaviour for start tag #{name}")
|
||||||
|
startTagOther(name, attributes)
|
||||||
|
#raise NotImplementedError
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@tree.reconstructActiveFormattingElements
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagP(name)
|
||||||
|
@tree.generateImpliedEndTags('p') if in_scope?('p')
|
||||||
|
@parser.parseError('Unexpected end tag (p).') unless @tree.openElements[-1].name == 'p'
|
||||||
|
@tree.openElements.pop while in_scope?('p')
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagBody(name)
|
||||||
|
# XXX Need to take open <p> tags into account here. We shouldn't imply
|
||||||
|
# </p> but we should not throw a parse error either. Specification is
|
||||||
|
# likely to be updated.
|
||||||
|
unless @tree.openElements[1].name == 'body'
|
||||||
|
# innerHTML case
|
||||||
|
@parser.parseError
|
||||||
|
return
|
||||||
|
end
|
||||||
|
unless @tree.openElements[-1].name == 'body'
|
||||||
|
@parser.parseError(_("Unexpected end tag (body). Missing end tag (#{@tree.openElements[-1].name})."))
|
||||||
|
end
|
||||||
|
@parser.phase = @parser.phases[:afterBody]
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagHtml(name)
|
||||||
|
endTagBody(name)
|
||||||
|
@parser.phase.processEndTag(name) unless @parser.innerHTML
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagBlock(name)
|
||||||
|
#Put us back in the right whitespace handling mode
|
||||||
|
@processSpaceCharactersPre = false if name == 'pre'
|
||||||
|
|
||||||
|
@tree.generateImpliedEndTags if in_scope?(name)
|
||||||
|
|
||||||
|
unless @tree.openElements[-1].name == name
|
||||||
|
@parser.parseError(("End tag (#{name}) seen too early. Expected other end tag."))
|
||||||
|
end
|
||||||
|
|
||||||
|
if in_scope?(name)
|
||||||
|
remove_open_elements_until(name)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagForm(name)
|
||||||
|
endTagBlock(name)
|
||||||
|
@tree.formPointer = nil
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagListItem(name)
|
||||||
|
# AT Could merge this with the Block case
|
||||||
|
if in_scope?(name)
|
||||||
|
@tree.generateImpliedEndTags(name)
|
||||||
|
|
||||||
|
unless @tree.openElements[-1].name == name
|
||||||
|
@parser.parseError(("End tag (#{name}) seen too early. Expected other end tag."))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
remove_open_elements_until(name) if in_scope?(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagHeading(name)
|
||||||
|
HEADING_ELEMENTS.each do |element|
|
||||||
|
if in_scope?(element)
|
||||||
|
@tree.generateImpliedEndTags
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
unless @tree.openElements[-1].name == name
|
||||||
|
@parser.parseError(("Unexpected end tag (#{name}). Expected other end tag."))
|
||||||
|
end
|
||||||
|
|
||||||
|
HEADING_ELEMENTS.each do |element|
|
||||||
|
if in_scope?(element)
|
||||||
|
remove_open_elements_until { |element| HEADING_ELEMENTS.include?(element.name) }
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# The much-feared adoption agency algorithm
|
||||||
|
def endTagFormatting(name)
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
|
||||||
|
# XXX Better parseError messages appreciated.
|
||||||
|
while true
|
||||||
|
# Step 1 paragraph 1
|
||||||
|
afeElement = @tree.elementInActiveFormattingElements(name)
|
||||||
|
if not afeElement or (@tree.openElements.include?(afeElement) and not in_scope?(afeElement.name))
|
||||||
|
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
|
||||||
|
return
|
||||||
|
# Step 1 paragraph 2
|
||||||
|
elsif not @tree.openElements.include?(afeElement)
|
||||||
|
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
|
||||||
|
@tree.activeFormattingElements.delete(afeElement)
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
# Step 1 paragraph 3
|
||||||
|
if afeElement != @tree.openElements[-1]
|
||||||
|
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
|
||||||
|
end
|
||||||
|
|
||||||
|
# Step 2
|
||||||
|
# Start of the adoption agency algorithm proper
|
||||||
|
afeIndex = @tree.openElements.index(afeElement)
|
||||||
|
furthestBlock = nil
|
||||||
|
@tree.openElements[afeIndex..-1].each do |element|
|
||||||
|
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
|
||||||
|
furthestBlock = element
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Step 3
|
||||||
|
if furthestBlock.nil?
|
||||||
|
element = remove_open_elements_until { |element| element == afeElement }
|
||||||
|
@tree.activeFormattingElements.delete(element)
|
||||||
|
return
|
||||||
|
end
|
||||||
|
commonAncestor = @tree.openElements[afeIndex - 1]
|
||||||
|
|
||||||
|
# Step 5
|
||||||
|
furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
|
||||||
|
|
||||||
|
# Step 6
|
||||||
|
# The bookmark is supposed to help us identify where to reinsert
|
||||||
|
# nodes in step 12. We have to ensure that we reinsert nodes after
|
||||||
|
# the node before the active formatting element. Note the bookmark
|
||||||
|
# can move in step 7.4
|
||||||
|
bookmark = @tree.activeFormattingElements.index(afeElement)
|
||||||
|
|
||||||
|
# Step 7
|
||||||
|
lastNode = node = furthestBlock
|
||||||
|
while true
|
||||||
|
# AT replace this with a function and recursion?
|
||||||
|
# Node is element before node in open elements
|
||||||
|
node = @tree.openElements[@tree.openElements.index(node) - 1]
|
||||||
|
until @tree.activeFormattingElements.include?(node)
|
||||||
|
tmpNode = node
|
||||||
|
node = @tree.openElements[@tree.openElements.index(node) - 1]
|
||||||
|
@tree.openElements.delete(tmpNode)
|
||||||
|
end
|
||||||
|
# Step 7.3
|
||||||
|
break if node == afeElement
|
||||||
|
# Step 7.4
|
||||||
|
if lastNode == furthestBlock
|
||||||
|
# XXX should this be index(node) or index(node)+1
|
||||||
|
# Anne: I think +1 is ok. Given x = [2,3,4,5]
|
||||||
|
# x.index(3) gives 1 and then x[1 +1] gives 4...
|
||||||
|
bookmark = @tree.activeFormattingElements.index(node) + 1
|
||||||
|
end
|
||||||
|
# Step 7.5
|
||||||
|
cite = node.parent
|
||||||
|
if node.hasContent
|
||||||
|
clone = node.cloneNode
|
||||||
|
# Replace node with clone
|
||||||
|
@tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
|
||||||
|
@tree.openElements[@tree.openElements.index(node)] = clone
|
||||||
|
node = clone
|
||||||
|
end
|
||||||
|
# Step 7.6
|
||||||
|
# Remove lastNode from its parents, if any
|
||||||
|
lastNode.parent.removeChild(lastNode) if lastNode.parent
|
||||||
|
node.appendChild(lastNode)
|
||||||
|
# Step 7.7
|
||||||
|
lastNode = node
|
||||||
|
# End of inner loop
|
||||||
|
end
|
||||||
|
|
||||||
|
# Step 8
|
||||||
|
lastNode.parent.removeChild(lastNode) if lastNode.parent
|
||||||
|
commonAncestor.appendChild(lastNode)
|
||||||
|
|
||||||
|
# Step 9
|
||||||
|
clone = afeElement.cloneNode
|
||||||
|
|
||||||
|
# Step 10
|
||||||
|
furthestBlock.reparentChildren(clone)
|
||||||
|
|
||||||
|
# Step 11
|
||||||
|
furthestBlock.appendChild(clone)
|
||||||
|
|
||||||
|
# Step 12
|
||||||
|
@tree.activeFormattingElements.delete(afeElement)
|
||||||
|
@tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
|
||||||
|
|
||||||
|
# Step 13
|
||||||
|
@tree.openElements.delete(afeElement)
|
||||||
|
@tree.openElements.insert(@tree.openElements.index(furthestBlock) + 1, clone)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagButtonMarqueeObject(name)
|
||||||
|
@tree.generateImpliedEndTags if in_scope?(name)
|
||||||
|
|
||||||
|
unless @tree.openElements[-1].name == name
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag first."))
|
||||||
|
end
|
||||||
|
|
||||||
|
if in_scope?(name)
|
||||||
|
remove_open_elements_until(name)
|
||||||
|
|
||||||
|
@tree.clearActiveFormattingElements
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagMisplaced(name)
|
||||||
|
# This handles elements with end tags in other insertion modes.
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagNone(name)
|
||||||
|
# This handles elements with no end tag.
|
||||||
|
@parser.parseError(_("This tag (#{name}) has no end tag"))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagCdataTextAreaXmp(name)
|
||||||
|
if @tree.openElements[-1].name == name
|
||||||
|
@tree.openElements.pop
|
||||||
|
else
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagNew(name)
|
||||||
|
# New HTML5 elements, "event-source", "section", "nav",
|
||||||
|
# "article", "aside", "header", "footer", "datagrid", "command"
|
||||||
|
STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
|
||||||
|
endTagOther(name)
|
||||||
|
#raise NotImplementedError
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
# XXX This logic should be moved into the treebuilder
|
||||||
|
@tree.openElements.reverse.each do |node|
|
||||||
|
if node.name == name
|
||||||
|
@tree.generateImpliedEndTags
|
||||||
|
|
||||||
|
unless @tree.openElements[-1].name == name
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name})."))
|
||||||
|
end
|
||||||
|
|
||||||
|
remove_open_elements_until { |element| element == node }
|
||||||
|
|
||||||
|
break
|
||||||
|
else
|
||||||
|
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
|
||||||
|
def addFormattingElement(name, attributes)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.activeFormattingElements.push(@tree.openElements[-1])
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
68
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb
vendored
Normal file
68
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_caption_phase.rb
vendored
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InCaptionPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-caption
|
||||||
|
|
||||||
|
handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableElement'
|
||||||
|
|
||||||
|
handle_end 'caption', 'table', %w( body col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
|
||||||
|
|
||||||
|
def ignoreEndTagCaption
|
||||||
|
not in_scope?('caption', true)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.phases[:inBody].processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTableElement(name, attributes)
|
||||||
|
@parser.parseError
|
||||||
|
#XXX Have to duplicate logic here to find out if the tag is ignored
|
||||||
|
ignoreEndTag = ignoreEndTagCaption
|
||||||
|
@parser.phase.processEndTag('caption')
|
||||||
|
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagCaption(name)
|
||||||
|
if ignoreEndTagCaption
|
||||||
|
# innerHTML case
|
||||||
|
assert @parser.innerHTML
|
||||||
|
@parser.parseError
|
||||||
|
else
|
||||||
|
# AT this code is quite similar to endTagTable in "InTable"
|
||||||
|
@tree.generateImpliedEndTags
|
||||||
|
|
||||||
|
unless @tree.openElements[-1].name == 'caption'
|
||||||
|
@parser.parseError(_("Unexpected end tag (caption). Missing end tags."))
|
||||||
|
end
|
||||||
|
|
||||||
|
remove_open_elements_until('caption')
|
||||||
|
|
||||||
|
@tree.clearActiveFormattingElements
|
||||||
|
@parser.phase = @parser.phases[:inTable]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTable(name)
|
||||||
|
@parser.parseError
|
||||||
|
ignoreEndTag = ignoreEndTagCaption
|
||||||
|
@parser.phase.processEndTag('caption')
|
||||||
|
@parser.phase.processEndTag(name) unless ignoreEndTag
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagIgnore(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.phases[:inBody].processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
78
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb
vendored
Normal file
78
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_cell_phase.rb
vendored
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InCellPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-cell
|
||||||
|
|
||||||
|
handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableOther'
|
||||||
|
|
||||||
|
handle_end %w( td th ) => 'TableCell', %w( body caption col colgroup html ) => 'Ignore'
|
||||||
|
|
||||||
|
handle_end %w( table tbody tfoot thead tr ) => 'Imply'
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.phases[:inBody].processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTableOther(name, attributes)
|
||||||
|
if in_scope?('td', true) or in_scope?('th', true)
|
||||||
|
closeCell
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
else
|
||||||
|
# innerHTML case
|
||||||
|
@parser.parseError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTableCell(name)
|
||||||
|
if in_scope?(name, true)
|
||||||
|
@tree.generateImpliedEndTags(name)
|
||||||
|
if @tree.openElements[-1].name != name
|
||||||
|
@parser.parseError("Got table cell end tag (#{name}) while required end tags are missing.")
|
||||||
|
|
||||||
|
remove_open_elements_until(name)
|
||||||
|
else
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
@tree.clearActiveFormattingElements
|
||||||
|
@parser.phase = @parser.phases[:inRow]
|
||||||
|
else
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagIgnore(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagImply(name)
|
||||||
|
if in_scope?(name, true)
|
||||||
|
closeCell
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
else
|
||||||
|
# sometimes innerHTML case
|
||||||
|
@parser.parseError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.phases[:inBody].processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
|
||||||
|
def closeCell
|
||||||
|
if in_scope?('td', true)
|
||||||
|
endTagTableCell('td')
|
||||||
|
elsif in_scope?('th', true)
|
||||||
|
endTagTableCell('th')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
55
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb
vendored
Normal file
55
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_column_group_phase.rb
vendored
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InColumnGroupPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-column
|
||||||
|
|
||||||
|
handle_start 'html', 'col'
|
||||||
|
|
||||||
|
handle_end 'colgroup', 'col'
|
||||||
|
|
||||||
|
def ignoreEndTagColgroup
|
||||||
|
@tree.openElements[-1].name == 'html'
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
ignoreEndTag = ignoreEndTagColgroup
|
||||||
|
endTagColgroup("colgroup")
|
||||||
|
@parser.phase.processCharacters(data) unless ignoreEndTag
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagCol(name, attributes)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
ignoreEndTag = ignoreEndTagColgroup
|
||||||
|
endTagColgroup('colgroup')
|
||||||
|
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagColgroup(name)
|
||||||
|
if ignoreEndTagColgroup
|
||||||
|
# innerHTML case
|
||||||
|
assert @parser.innerHTML
|
||||||
|
@parser.parseError
|
||||||
|
else
|
||||||
|
@tree.openElements.pop
|
||||||
|
@parser.phase = @parser.phases[:inTable]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagCol(name)
|
||||||
|
@parser.parseError(_('Unexpected end tag (col). col has no end tag.'))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
ignoreEndTag = ignoreEndTagColgroup
|
||||||
|
endTagColgroup('colgroup')
|
||||||
|
@parser.phase.processEndTag(name) unless ignoreEndTag
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
57
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_frameset_phase.rb
vendored
Normal file
57
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_frameset_phase.rb
vendored
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InFramesetPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
|
||||||
|
|
||||||
|
handle_start 'html', 'frameset', 'frame', 'noframes'
|
||||||
|
|
||||||
|
handle_end 'frameset', 'noframes'
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.parseError(_('Unexpected characters in the frameset phase. Characters ignored.'))
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagFrameset(name, attributes)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagFrame(name, attributes)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagNoframes(name, attributes)
|
||||||
|
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored"))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagFrameset(name)
|
||||||
|
if @tree.openElements[-1].name == 'html'
|
||||||
|
# innerHTML case
|
||||||
|
@parser.parseError(_("Unexpected end tag token (frameset) in the frameset phase (innerHTML)."))
|
||||||
|
else
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
if (not @parser.innerHTML and
|
||||||
|
@tree.openElements[-1].name != 'frameset')
|
||||||
|
# If we're not in innerHTML mode and the the current node is not a
|
||||||
|
# "frameset" element (anymore) then switch.
|
||||||
|
@parser.phase = @parser.phases[:afterFrameset]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagNoframes(name)
|
||||||
|
@parser.phases[:inBody].processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
120
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_head_phase.rb
vendored
Normal file
120
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_head_phase.rb
vendored
Normal file
|
@ -0,0 +1,120 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InHeadPhase < Phase
|
||||||
|
|
||||||
|
handle_start 'html', 'head', 'title', 'style', 'script', %w( base link meta )
|
||||||
|
|
||||||
|
handle_end 'head', 'html', %w( title style script )
|
||||||
|
|
||||||
|
def processEOF
|
||||||
|
if ['title', 'style', 'script'].include?(name = @tree.openElements[-1].name)
|
||||||
|
@parser.parseError(_("Unexpected end of file. Expected end tag (#{name})."))
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
anythingElse
|
||||||
|
@parser.phase.processEOF
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
if ['title', 'style', 'script'].include?(@tree.openElements[-1].name)
|
||||||
|
@tree.insertText(data)
|
||||||
|
else
|
||||||
|
anythingElse
|
||||||
|
@parser.phase.processCharacters(data)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagHead(name, attributes)
|
||||||
|
@parser.parseError(_('Unexpected start tag head in existing head. Ignored'))
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTitle(name, attributes)
|
||||||
|
element = @tree.createElement(name, attributes)
|
||||||
|
appendToHead(element)
|
||||||
|
@tree.openElements.push(element)
|
||||||
|
@parser.tokenizer.contentModelFlag = :RCDATA
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagStyle(name, attributes)
|
||||||
|
element = @tree.createElement(name, attributes)
|
||||||
|
if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
|
||||||
|
appendToHead(element)
|
||||||
|
else
|
||||||
|
@tree.openElements[-1].appendChild(element)
|
||||||
|
end
|
||||||
|
@tree.openElements.push(element)
|
||||||
|
@parser.tokenizer.contentModelFlag = :CDATA
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagScript(name, attributes)
|
||||||
|
#XXX Inner HTML case may be wrong
|
||||||
|
element = @tree.createElement(name, attributes)
|
||||||
|
element._flags.push("parser-inserted")
|
||||||
|
if (@tree.headPointer != nil and
|
||||||
|
@parser.phase == @parser.phases[:inHead])
|
||||||
|
appendToHead(element)
|
||||||
|
else
|
||||||
|
@tree.openElements[-1].appendChild(element)
|
||||||
|
end
|
||||||
|
@tree.openElements.push(element)
|
||||||
|
@parser.tokenizer.contentModelFlag = :CDATA
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagBaseLinkMeta(name, attributes)
|
||||||
|
element = @tree.createElement(name, attributes)
|
||||||
|
appendToHead(element)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
anythingElse
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagHead(name)
|
||||||
|
if @tree.openElements[-1].name == 'head'
|
||||||
|
@tree.openElements.pop
|
||||||
|
else
|
||||||
|
@parser.parseError(_("Unexpected end tag (head). Ignored."))
|
||||||
|
end
|
||||||
|
@parser.phase = @parser.phases[:afterHead]
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagHtml(name)
|
||||||
|
anythingElse
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTitleStyleScript(name)
|
||||||
|
if @tree.openElements[-1].name == name
|
||||||
|
@tree.openElements.pop
|
||||||
|
else
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def anythingElse
|
||||||
|
if @tree.openElements[-1].name == 'head'
|
||||||
|
endTagHead('head')
|
||||||
|
else
|
||||||
|
@parser.phase = @parser.phases[:afterHead]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
|
||||||
|
def appendToHead(element)
|
||||||
|
if @tree.headPointer.nil?
|
||||||
|
assert @parser.innerHTML
|
||||||
|
@tree.openElements[-1].appendChild(element)
|
||||||
|
else
|
||||||
|
@tree.headPointer.appendChild(element)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
87
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb
vendored
Normal file
87
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_row_phase.rb
vendored
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InRowPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-row
|
||||||
|
|
||||||
|
handle_start 'html', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead tr ) => 'TableOther'
|
||||||
|
|
||||||
|
handle_end 'tr', 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th ) => 'Ignore'
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.phases[:inTable].processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTableCell(name, attributes)
|
||||||
|
clearStackToTableRowContext
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inCell]
|
||||||
|
@tree.activeFormattingElements.push(Marker)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTableOther(name, attributes)
|
||||||
|
ignoreEndTag = ignoreEndTagTr
|
||||||
|
endTagTr('tr')
|
||||||
|
# XXX how are we sure it's always ignored in the innerHTML case?
|
||||||
|
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@parser.phases[:inTable].processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTr(name)
|
||||||
|
if ignoreEndTagTr
|
||||||
|
# innerHTML case
|
||||||
|
assert @parser.innerHTML
|
||||||
|
@parser.parseError
|
||||||
|
else
|
||||||
|
clearStackToTableRowContext
|
||||||
|
@tree.openElements.pop
|
||||||
|
@parser.phase = @parser.phases[:inTableBody]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTable(name)
|
||||||
|
ignoreEndTag = ignoreEndTagTr
|
||||||
|
endTagTr('tr')
|
||||||
|
# Reprocess the current tag if the tr end tag was not ignored
|
||||||
|
# XXX how are we sure it's always ignored in the innerHTML case?
|
||||||
|
@parser.phase.processEndTag(name) unless ignoreEndTag
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTableRowGroup(name)
|
||||||
|
if in_scope?(name, true)
|
||||||
|
endTagTr('tr')
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
else
|
||||||
|
# innerHTML case
|
||||||
|
@parser.parseError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagIgnore(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}) in the row phase. Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.phases[:inTable].processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
|
||||||
|
# XXX unify this with other table helper methods
|
||||||
|
def clearStackToTableRowContext
|
||||||
|
until ['tr', 'html'].include?(name = @tree.openElements[-1].name)
|
||||||
|
@parser.parseError(_("Unexpected implied end tag (#{name}) in the row phase."))
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def ignoreEndTagTr
|
||||||
|
not in_scope?('tr', :tableVariant => true)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
84
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_select_phase.rb
vendored
Normal file
84
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_select_phase.rb
vendored
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InSelectPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
|
||||||
|
|
||||||
|
handle_start 'html', 'option', 'optgroup', 'select'
|
||||||
|
|
||||||
|
handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@tree.insertText(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOption(name, attributes)
|
||||||
|
# We need to imply </option> if <option> is the current node.
|
||||||
|
@tree.openElements.pop if @tree.openElements[-1].name == 'option'
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOptgroup(name, attributes)
|
||||||
|
@tree.openElements.pop if @tree.openElements[-1].name == 'option'
|
||||||
|
@tree.openElements.pop if @tree.openElements[-1].name == 'optgroup'
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagSelect(name, attributes)
|
||||||
|
@parser.parseError(_('Unexpected start tag (select) in the select phase implies select start tag.'))
|
||||||
|
endTagSelect('select')
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@parser.parseError(_('Unexpected start tag token (#{name}) in the select phase. Ignored.'))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOption(name)
|
||||||
|
if @tree.openElements[-1].name == 'option'
|
||||||
|
@tree.openElements.pop
|
||||||
|
else
|
||||||
|
@parser.parseError(_('Unexpected end tag (option) in the select phase. Ignored.'))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOptgroup(name)
|
||||||
|
# </optgroup> implicitly closes <option>
|
||||||
|
if @tree.openElements[-1].name == 'option' and @tree.openElements[-2].name == 'optgroup'
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
# It also closes </optgroup>
|
||||||
|
if @tree.openElements[-1].name == 'optgroup'
|
||||||
|
@tree.openElements.pop
|
||||||
|
# But nothing else
|
||||||
|
else
|
||||||
|
@parser.parseError(_('Unexpected end tag (optgroup) in the select phase. Ignored.'))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagSelect(name)
|
||||||
|
if in_scope?('select', true)
|
||||||
|
remove_open_elements_until('select')
|
||||||
|
|
||||||
|
@parser.resetInsertionMode
|
||||||
|
else
|
||||||
|
# innerHTML case
|
||||||
|
@parser.parseError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTableElements(name)
|
||||||
|
@parser.parseError(_("Unexpected table end tag (#{name}) in the select phase."))
|
||||||
|
|
||||||
|
if in_scope?(name, true)
|
||||||
|
endTagSelect('select')
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag token (#{name}) in the select phase. Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
83
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_body_phase.rb
vendored
Normal file
83
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_body_phase.rb
vendored
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InTableBodyPhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
|
||||||
|
|
||||||
|
handle_start 'html', 'tr', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead ) => 'TableOther'
|
||||||
|
|
||||||
|
handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ingore'
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.phases[:inTable].processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTr(name, attributes)
|
||||||
|
clearStackToTableBodyContext
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inRow]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTableCell(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected table cell start tag (#{name}) in the table body phase."))
|
||||||
|
startTagTr('tr', {})
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTableOther(name, attributes)
|
||||||
|
# XXX AT Any ideas on how to share this with endTagTable?
|
||||||
|
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
|
||||||
|
clearStackToTableBodyContext
|
||||||
|
endTagTableRowGroup(@tree.openElements[-1].name)
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
else
|
||||||
|
# innerHTML case
|
||||||
|
@parser.parseError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@parser.phases[:inTable].processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTableRowGroup(name)
|
||||||
|
if in_scope?(name, true)
|
||||||
|
clearStackToTableBodyContext
|
||||||
|
@tree.openElements.pop
|
||||||
|
@parser.phase = @parser.phases[:inTable]
|
||||||
|
else
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTable(name)
|
||||||
|
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
|
||||||
|
clearStackToTableBodyContext
|
||||||
|
endTagTableRowGroup(@tree.openElements[-1].name)
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
else
|
||||||
|
# innerHTML case
|
||||||
|
@parser.parseError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagIgnore(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.phases[:inTable].processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
|
||||||
|
def clearStackToTableBodyContext
|
||||||
|
until ['tbody', 'tfoot', 'thead', 'html'].include?(name = @tree.openElements[-1].name)
|
||||||
|
@parser.parseError(_("Unexpected implied end tag (#{name}) in the table body phase."))
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
110
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_phase.rb
vendored
Normal file
110
vendor/plugins/HTML5lib/lib/html5lib/html5parser/in_table_phase.rb
vendored
Normal file
|
@ -0,0 +1,110 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InTablePhase < Phase
|
||||||
|
|
||||||
|
# http://www.whatwg.org/specs/web-apps/current-work/#in-table
|
||||||
|
|
||||||
|
handle_start 'html', 'caption', 'colgroup', 'col', 'table'
|
||||||
|
|
||||||
|
handle_start %w( tbody tfoot thead ) => 'RowGroup', %w( td th tr ) => 'ImplyTbody'
|
||||||
|
|
||||||
|
handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.parseError(_("Unexpected non-space characters in table context caused voodoo mode."))
|
||||||
|
# Make all the special element rearranging voodoo kick in
|
||||||
|
@tree.insertFromTable = true
|
||||||
|
# Process the character in the "in body" mode
|
||||||
|
@parser.phases[:inBody].processCharacters(data)
|
||||||
|
@tree.insertFromTable = false
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagCaption(name, attributes)
|
||||||
|
clearStackToTableContext
|
||||||
|
@tree.activeFormattingElements.push(Marker)
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inCaption]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagColgroup(name, attributes)
|
||||||
|
clearStackToTableContext
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inColumnGroup]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagCol(name, attributes)
|
||||||
|
startTagColgroup('colgroup', {})
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagRowGroup(name, attributes)
|
||||||
|
clearStackToTableContext
|
||||||
|
@tree.insertElement(name, attributes)
|
||||||
|
@parser.phase = @parser.phases[:inTableBody]
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagImplyTbody(name, attributes)
|
||||||
|
startTagRowGroup('tbody', {})
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagTable(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected start tag (table) in table phase. Implies end tag (table)."))
|
||||||
|
@parser.phase.processEndTag('table')
|
||||||
|
@parser.phase.processStartTag(name, attributes) unless @parser.innerHTML
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagOther(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected start tag (#{name}) in table context caused voodoo mode."))
|
||||||
|
# Make all the special element rearranging voodoo kick in
|
||||||
|
@tree.insertFromTable = true
|
||||||
|
# Process the start tag in the "in body" mode
|
||||||
|
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||||
|
@tree.insertFromTable = false
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagTable(name)
|
||||||
|
if in_scope?('table', true)
|
||||||
|
@tree.generateImpliedEndTags
|
||||||
|
|
||||||
|
unless @tree.openElements[-1].name == 'table'
|
||||||
|
@parser.parseError(_("Unexpected end tag (table). Expected end tag (#{@tree.openElements[-1].name})."))
|
||||||
|
end
|
||||||
|
|
||||||
|
remove_open_elements_until('table')
|
||||||
|
|
||||||
|
@parser.resetInsertionMode
|
||||||
|
else
|
||||||
|
# innerHTML case
|
||||||
|
assert @parser.innerHTML
|
||||||
|
@parser.parseError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagIgnore(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||||
|
end
|
||||||
|
|
||||||
|
def endTagOther(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
|
||||||
|
# Make all the special element rearranging voodoo kick in
|
||||||
|
@parser.insertFromTable = true
|
||||||
|
# Process the end tag in the "in body" mode
|
||||||
|
@parser.phases[:inBody].processEndTag(name)
|
||||||
|
@parser.insertFromTable = false
|
||||||
|
end
|
||||||
|
|
||||||
|
protected
|
||||||
|
|
||||||
|
def clearStackToTableContext
|
||||||
|
# "clear the stack back to a table context"
|
||||||
|
until ['table', 'html'].include?(name = @tree.openElements[-1].name)
|
||||||
|
@parser.parseError(_("Unexpected implied end tag (#{name}) in the table phase."))
|
||||||
|
@tree.openElements.pop
|
||||||
|
end
|
||||||
|
# When the current node is <html> it's an innerHTML case
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
49
vendor/plugins/HTML5lib/lib/html5lib/html5parser/initial_phase.rb
vendored
Normal file
49
vendor/plugins/HTML5lib/lib/html5lib/html5parser/initial_phase.rb
vendored
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class InitialPhase < Phase
|
||||||
|
|
||||||
|
# This phase deals with error handling as well which is currently not
|
||||||
|
# covered in the specification. The error handling is typically known as
|
||||||
|
# "quirks mode". It is expected that a future version of HTML5 will define this.
|
||||||
|
|
||||||
|
def processEOF
|
||||||
|
@parser.parseError(_('Unexpected End of file. Expected DOCTYPE.'))
|
||||||
|
@parser.phase = @parser.phases[:rootElement]
|
||||||
|
@parser.phase.processEOF
|
||||||
|
end
|
||||||
|
|
||||||
|
def processComment(data)
|
||||||
|
@tree.insertComment(data, @tree.document)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processDoctype(name, error)
|
||||||
|
@parser.parseError(_('Erroneous DOCTYPE.')) if error
|
||||||
|
@tree.insertDoctype(name)
|
||||||
|
@parser.phase = @parser.phases[:rootElement]
|
||||||
|
end
|
||||||
|
|
||||||
|
def processSpaceCharacters(data)
|
||||||
|
@tree.insertText(data, @tree.document)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.parseError(_('Unexpected non-space characters. Expected DOCTYPE.'))
|
||||||
|
@parser.phase = @parser.phases[:rootElement]
|
||||||
|
@parser.phase.processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processStartTag(name, attributes)
|
||||||
|
@parser.parseError(_("Unexpected start tag (#{name}). Expected DOCTYPE."))
|
||||||
|
@parser.phase = @parser.phases[:rootElement]
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processEndTag(name)
|
||||||
|
@parser.parseError(_("Unexpected end tag (#{name}). Expected DOCTYPE."))
|
||||||
|
@parser.phase = @parser.phases[:rootElement]
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
156
vendor/plugins/HTML5lib/lib/html5lib/html5parser/phase.rb
vendored
Normal file
156
vendor/plugins/HTML5lib/lib/html5lib/html5parser/phase.rb
vendored
Normal file
|
@ -0,0 +1,156 @@
|
||||||
|
module HTML5lib
|
||||||
|
# Base class for helper objects that implement each phase of processing.
|
||||||
|
#
|
||||||
|
# Handler methods should be in the following order (they can be omitted):
|
||||||
|
#
|
||||||
|
# * EOF
|
||||||
|
# * Comment
|
||||||
|
# * Doctype
|
||||||
|
# * SpaceCharacters
|
||||||
|
# * Characters
|
||||||
|
# * StartTag
|
||||||
|
# - startTag* methods
|
||||||
|
# * EndTag
|
||||||
|
# - endTag* methods
|
||||||
|
#
|
||||||
|
class Phase
|
||||||
|
|
||||||
|
# The following example call:
|
||||||
|
#
|
||||||
|
# tag_handlers('startTag', 'html', %( base link meta ), %( li dt dd ) => 'ListItem')
|
||||||
|
#
|
||||||
|
# ...would return a hash equal to this:
|
||||||
|
#
|
||||||
|
# { 'html' => 'startTagHtml',
|
||||||
|
# 'base' => 'startTagBaseLinkMeta',
|
||||||
|
# 'link' => 'startTagBaseLinkMeta',
|
||||||
|
# 'meta' => 'startTagBaseLinkMeta',
|
||||||
|
# 'li' => 'startTagListItem',
|
||||||
|
# 'dt' => 'startTagListItem',
|
||||||
|
# 'dd' => 'startTagListItem' }
|
||||||
|
#
|
||||||
|
def self.tag_handlers(prefix, *tags)
|
||||||
|
mapping = {}
|
||||||
|
if tags.last.is_a?(Hash)
|
||||||
|
tags.pop.each do |names, handler_method_suffix|
|
||||||
|
handler_method = prefix + handler_method_suffix
|
||||||
|
Array(names).each { |name| mapping[name] = handler_method }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
tags.each do |names|
|
||||||
|
names = Array(names)
|
||||||
|
handler_method = prefix + names.map { |name| name.capitalize }.join
|
||||||
|
names.each { |name| mapping[name] = handler_method }
|
||||||
|
end
|
||||||
|
return mapping
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.start_tag_handlers
|
||||||
|
@start_tag_handlers ||= Hash.new('startTagOther')
|
||||||
|
end
|
||||||
|
|
||||||
|
# Declare what start tags this Phase handles. Can be called more than once.
|
||||||
|
#
|
||||||
|
# Example usage:
|
||||||
|
#
|
||||||
|
# handle_start 'html'
|
||||||
|
# # html start tags will be handled by a method named 'startTagHtml'
|
||||||
|
#
|
||||||
|
# handle_start %( base link meta )
|
||||||
|
# # base, link and meta start tags will be handled by a method named 'startTagBaseLinkMeta'
|
||||||
|
#
|
||||||
|
# handle_start %( li dt dd ) => 'ListItem'
|
||||||
|
# # li, dt, and dd start tags will be handled by a method named 'startTagListItem'
|
||||||
|
#
|
||||||
|
def self.handle_start(*tags)
|
||||||
|
start_tag_handlers.update tag_handlers('startTag', *tags)
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.end_tag_handlers
|
||||||
|
@end_tag_handlers ||= Hash.new('endTagOther')
|
||||||
|
end
|
||||||
|
|
||||||
|
# Declare what end tags this Phase handles. Behaves like handle_start.
|
||||||
|
#
|
||||||
|
def self.handle_end(*tags)
|
||||||
|
end_tag_handlers.update tag_handlers('endTag', *tags)
|
||||||
|
end
|
||||||
|
|
||||||
|
def initialize(parser, tree)
|
||||||
|
@parser, @tree = parser, tree
|
||||||
|
end
|
||||||
|
|
||||||
|
def processEOF
|
||||||
|
@tree.generateImpliedEndTags
|
||||||
|
|
||||||
|
if @tree.openElements.length > 2
|
||||||
|
@parser.parseError(_('Unexpected end of file. Missing closing tags.'))
|
||||||
|
elsif @tree.openElements.length == 2 and @tree.openElements[1].name != 'body'
|
||||||
|
# This happens for framesets or something?
|
||||||
|
@parser.parseError(_("Unexpected end of file. Expected end tag (#{@tree.openElements[1].name}) first."))
|
||||||
|
elsif @parser.innerHTML and @tree.openElements.length > 1
|
||||||
|
# XXX This is not what the specification says. Not sure what to do here.
|
||||||
|
@parser.parseError(_('XXX innerHTML EOF'))
|
||||||
|
end
|
||||||
|
# Betting ends.
|
||||||
|
end
|
||||||
|
|
||||||
|
def processComment(data)
|
||||||
|
# For most phases the following is correct. Where it's not it will be
|
||||||
|
# overridden.
|
||||||
|
@tree.insertComment(data, @tree.openElements[-1])
|
||||||
|
end
|
||||||
|
|
||||||
|
def processDoctype(name, error)
|
||||||
|
@parser.parseError(_('Unexpected DOCTYPE. Ignored.'))
|
||||||
|
end
|
||||||
|
|
||||||
|
def processSpaceCharacters(data)
|
||||||
|
@tree.insertText(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processStartTag(name, attributes)
|
||||||
|
send self.class.start_tag_handlers[name], name, attributes
|
||||||
|
end
|
||||||
|
|
||||||
|
def startTagHtml(name, attributes)
|
||||||
|
if @parser.firstStartTag == false and name == 'html'
|
||||||
|
@parser.parseError(_('html needs to be the first start tag.'))
|
||||||
|
end
|
||||||
|
# XXX Need a check here to see if the first start tag token emitted is
|
||||||
|
# this token... If it's not, invoke @parser.parseError.
|
||||||
|
attributes.each do |attr, value|
|
||||||
|
unless @tree.openElements[0].attributes.has_key?(attr)
|
||||||
|
@tree.openElements[0].attributes[attr] = value
|
||||||
|
end
|
||||||
|
end
|
||||||
|
@parser.firstStartTag = false
|
||||||
|
end
|
||||||
|
|
||||||
|
def processEndTag(name)
|
||||||
|
send self.class.end_tag_handlers[name], name
|
||||||
|
end
|
||||||
|
|
||||||
|
def _(string)
|
||||||
|
string
|
||||||
|
end
|
||||||
|
|
||||||
|
def assert(value)
|
||||||
|
throw AssertionError.new unless value
|
||||||
|
end
|
||||||
|
|
||||||
|
def in_scope?(*args)
|
||||||
|
@tree.elementInScope(*args)
|
||||||
|
end
|
||||||
|
|
||||||
|
def remove_open_elements_until(name=nil)
|
||||||
|
finished = false
|
||||||
|
until finished
|
||||||
|
element = @tree.openElements.pop
|
||||||
|
finished = name.nil?? yield(element) : element.name == name
|
||||||
|
end
|
||||||
|
return element
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
43
vendor/plugins/HTML5lib/lib/html5lib/html5parser/root_element_phase.rb
vendored
Normal file
43
vendor/plugins/HTML5lib/lib/html5lib/html5parser/root_element_phase.rb
vendored
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class RootElementPhase < Phase
|
||||||
|
|
||||||
|
def processEOF
|
||||||
|
insertHtmlElement
|
||||||
|
@parser.phase.processEOF
|
||||||
|
end
|
||||||
|
|
||||||
|
def processComment(data)
|
||||||
|
@tree.insertComment(data, @tree.document)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processSpaceCharacters(data)
|
||||||
|
@tree.insertText(data, @tree.document)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
insertHtmlElement
|
||||||
|
@parser.phase.processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processStartTag(name, attributes)
|
||||||
|
@parser.firstStartTag = true if name == 'html'
|
||||||
|
insertHtmlElement
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processEndTag(name)
|
||||||
|
insertHtmlElement
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def insertHtmlElement
|
||||||
|
element = @tree.createElement('html', {})
|
||||||
|
@tree.openElements.push(element)
|
||||||
|
@tree.document.appendChild(element)
|
||||||
|
@parser.phase = @parser.phases[:beforeHead]
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
36
vendor/plugins/HTML5lib/lib/html5lib/html5parser/trailing_end_phase.rb
vendored
Normal file
36
vendor/plugins/HTML5lib/lib/html5lib/html5parser/trailing_end_phase.rb
vendored
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
require 'html5lib/html5parser/phase'
|
||||||
|
|
||||||
|
module HTML5lib
|
||||||
|
class TrailingEndPhase < Phase
|
||||||
|
|
||||||
|
def processEOF
|
||||||
|
end
|
||||||
|
|
||||||
|
def processComment(data)
|
||||||
|
@tree.insertComment(data, @tree.document)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processSpaceCharacters(data)
|
||||||
|
@parser.lastPhase.processSpaceCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processCharacters(data)
|
||||||
|
@parser.parseError(_('Unexpected non-space characters. Expected end of file.'))
|
||||||
|
@parser.phase = @parser.lastPhase
|
||||||
|
@parser.phase.processCharacters(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processStartTag(name, attributes)
|
||||||
|
@parser.parseError(_('Unexpected start tag (#{name}). Expected end of file.'))
|
||||||
|
@parser.phase = @parser.lastPhase
|
||||||
|
@parser.phase.processStartTag(name, attributes)
|
||||||
|
end
|
||||||
|
|
||||||
|
def processEndTag(name)
|
||||||
|
@parser.parseError(_('Unexpected end tag (#{name}). Expected end of file.'))
|
||||||
|
@parser.phase = @parser.lastPhase
|
||||||
|
@parser.phase.processEndTag(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
799
vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
vendored
799
vendor/plugins/HTML5lib/lib/html5lib/inputstream.rb
vendored
|
@ -3,14 +3,14 @@ require 'html5lib/constants'
|
||||||
|
|
||||||
module HTML5lib
|
module HTML5lib
|
||||||
|
|
||||||
# Provides a unicode stream of characters to the HTMLTokenizer.
|
# Provides a unicode stream of characters to the HTMLTokenizer.
|
||||||
|
|
||||||
# This class takes care of character encoding and removing or replacing
|
# This class takes care of character encoding and removing or replacing
|
||||||
# incorrect byte-sequences and also provides column and line tracking.
|
# incorrect byte-sequences and also provides column and line tracking.
|
||||||
|
|
||||||
class HTMLInputStream
|
class HTMLInputStream
|
||||||
|
|
||||||
attr_accessor :queue, :charEncoding
|
attr_accessor :queue, :char_encoding
|
||||||
|
|
||||||
# Initialises the HTMLInputStream.
|
# Initialises the HTMLInputStream.
|
||||||
#
|
#
|
||||||
|
@ -27,523 +27,524 @@ class HTMLInputStream
|
||||||
# parseMeta - Look for a <meta> element containing encoding information
|
# parseMeta - Look for a <meta> element containing encoding information
|
||||||
|
|
||||||
def initialize(source, options = {})
|
def initialize(source, options = {})
|
||||||
@encoding = nil
|
@encoding = nil
|
||||||
@parseMeta = true
|
@parse_meta = true
|
||||||
@chardet = true
|
@chardet = true
|
||||||
|
|
||||||
options.each { |name, value| instance_variable_set("@#{name}", value) }
|
options.each { |name, value| instance_variable_set("@#{name}", value) }
|
||||||
|
|
||||||
# List of where new lines occur
|
# List of where new lines occur
|
||||||
@newLines = []
|
@new_lines = []
|
||||||
|
|
||||||
# Raw Stream
|
# Raw Stream
|
||||||
@rawStream = openStream(source)
|
@raw_stream = open_stream(source)
|
||||||
|
|
||||||
# Encoding Information
|
# Encoding Information
|
||||||
#Number of bytes to use when looking for a meta element with
|
#Number of bytes to use when looking for a meta element with
|
||||||
#encoding information
|
#encoding information
|
||||||
@NUM_BYTES_META = 512
|
@NUM_BYTES_META = 512
|
||||||
#Encoding to use if no other information can be found
|
#Encoding to use if no other information can be found
|
||||||
@DEFAULT_ENCODING = 'windows-1252'
|
@DEFAULT_ENCODING = 'windows-1252'
|
||||||
|
|
||||||
#Detect encoding iff no explicit "transport level" encoding is supplied
|
#Detect encoding iff no explicit "transport level" encoding is supplied
|
||||||
if @encoding.nil? or not HTML5lib.isValidEncoding(@encoding)
|
if @encoding.nil? or not HTML5lib.is_valid_encoding(@encoding)
|
||||||
@charEncoding = detectEncoding
|
@char_encoding = detect_encoding
|
||||||
else
|
else
|
||||||
@charEncoding = @encoding
|
@char_encoding = @encoding
|
||||||
|
end
|
||||||
|
|
||||||
|
# Read bytes from stream decoding them into Unicode
|
||||||
|
uString = @raw_stream.read
|
||||||
|
unless @char_encoding == 'utf-8'
|
||||||
|
begin
|
||||||
|
require 'iconv'
|
||||||
|
uString = Iconv.iconv('utf-8', @encoding, uString)[0]
|
||||||
|
rescue
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# Read bytes from stream decoding them into Unicode
|
# Normalize newlines and null characters
|
||||||
uString = @rawStream.read
|
uString.gsub!(/\r\n?/, "\n")
|
||||||
unless @charEncoding == 'utf-8'
|
uString.gsub!("\x00", [0xFFFD].pack('U'))
|
||||||
begin
|
|
||||||
require 'iconv'
|
|
||||||
uString = Iconv.iconv('utf-8', @encoding, uString)[0]
|
|
||||||
rescue
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Normalize newlines and null characters
|
# Convert the unicode string into a list to be used as the data stream
|
||||||
uString.gsub!(/\r\n?/, "\n")
|
@data_stream = uString
|
||||||
uString.gsub!("\x00", [0xFFFD].pack('U'))
|
|
||||||
|
|
||||||
# Convert the unicode string into a list to be used as the data stream
|
@queue = []
|
||||||
@dataStream = uString
|
|
||||||
|
|
||||||
@queue = []
|
# Reset position in the list to read from
|
||||||
|
reset
|
||||||
# Reset position in the list to read from
|
|
||||||
reset
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# Produces a file object from source.
|
# Produces a file object from source.
|
||||||
#
|
#
|
||||||
# source can be either a file object, local filename or a string.
|
# source can be either a file object, local filename or a string.
|
||||||
def openStream(source)
|
def open_stream(source)
|
||||||
# Already an IO like object
|
# Already an IO like object
|
||||||
if source.respond_to?(:read)
|
if source.respond_to?(:read)
|
||||||
@stream = source
|
@stream = source
|
||||||
else
|
else
|
||||||
# Treat source as a string and wrap in StringIO
|
# Treat source as a string and wrap in StringIO
|
||||||
@stream = StringIO.new(source)
|
@stream = StringIO.new(source)
|
||||||
end
|
end
|
||||||
return @stream
|
return @stream
|
||||||
end
|
end
|
||||||
|
|
||||||
def detectEncoding
|
def detect_encoding
|
||||||
|
|
||||||
#First look for a BOM
|
#First look for a BOM
|
||||||
#This will also read past the BOM if present
|
#This will also read past the BOM if present
|
||||||
encoding = detectBOM
|
encoding = detect_bom
|
||||||
#If there is no BOM need to look for meta elements with encoding
|
#If there is no BOM need to look for meta elements with encoding
|
||||||
#information
|
#information
|
||||||
if encoding.nil? and @parseMeta
|
if encoding.nil? and @parse_meta
|
||||||
encoding = detectEncodingMeta
|
encoding = detect_encoding_meta
|
||||||
|
end
|
||||||
|
#Guess with chardet, if avaliable
|
||||||
|
if encoding.nil? and @chardet
|
||||||
|
begin
|
||||||
|
require 'rubygems'
|
||||||
|
require 'UniversalDetector' # gem install chardet
|
||||||
|
buffer = @raw_stream.read
|
||||||
|
encoding = UniversalDetector::chardet(buffer)['encoding']
|
||||||
|
@raw_stream = open_stream(buffer)
|
||||||
|
rescue LoadError
|
||||||
end
|
end
|
||||||
#Guess with chardet, if avaliable
|
end
|
||||||
if encoding.nil? and @chardet
|
# If all else fails use the default encoding
|
||||||
begin
|
if encoding.nil?
|
||||||
require 'rubygems'
|
encoding = @DEFAULT_ENCODING
|
||||||
require 'UniversalDetector' # gem install chardet
|
end
|
||||||
buffer = @rawStream.read
|
|
||||||
encoding = UniversalDetector::chardet(buffer)['encoding']
|
#Substitute for equivalent encodings:
|
||||||
@rawStream = openStream(buffer)
|
encoding_sub = {'ascii' => 'windows-1252', 'iso-8859-1' => 'windows-1252'}
|
||||||
rescue LoadError
|
|
||||||
end
|
|
||||||
end
|
|
||||||
# If all else fails use the default encoding
|
|
||||||
if encoding.nil?
|
|
||||||
encoding = @DEFAULT_ENCODING
|
|
||||||
end
|
|
||||||
|
|
||||||
#Substitute for equivalent encodings:
|
|
||||||
encodingSub = {'ascii' => 'windows-1252', 'iso-8859-1' => 'windows-1252'}
|
|
||||||
|
|
||||||
if encodingSub.has_key?(encoding.downcase)
|
if encoding_sub.has_key?(encoding.downcase)
|
||||||
encoding = encodingSub[encoding.downcase]
|
encoding = encoding_sub[encoding.downcase]
|
||||||
end
|
end
|
||||||
|
|
||||||
return encoding
|
return encoding
|
||||||
end
|
end
|
||||||
|
|
||||||
# Attempts to detect at BOM at the start of the stream. If
|
# Attempts to detect at BOM at the start of the stream. If
|
||||||
# an encoding can be determined from the BOM return the name of the
|
# an encoding can be determined from the BOM return the name of the
|
||||||
# encoding otherwise return nil
|
# encoding otherwise return nil
|
||||||
def detectBOM
|
def detect_bom
|
||||||
bomDict = {
|
bom_dict = {
|
||||||
"\xef\xbb\xbf" => 'utf-8',
|
"\xef\xbb\xbf" => 'utf-8',
|
||||||
"\xff\xfe" => 'utf-16-le',
|
"\xff\xfe" => 'utf-16-le',
|
||||||
"\xfe\xff" => 'utf-16-be',
|
"\xfe\xff" => 'utf-16-be',
|
||||||
"\xff\xfe\x00\x00" => 'utf-32-le',
|
"\xff\xfe\x00\x00" => 'utf-32-le',
|
||||||
"\x00\x00\xfe\xff" => 'utf-32-be'
|
"\x00\x00\xfe\xff" => 'utf-32-be'
|
||||||
}
|
}
|
||||||
|
|
||||||
# Go to beginning of file and read in 4 bytes
|
# Go to beginning of file and read in 4 bytes
|
||||||
@rawStream.seek(0)
|
@raw_stream.seek(0)
|
||||||
string = @rawStream.read(4)
|
string = @raw_stream.read(4)
|
||||||
return nil unless string
|
return nil unless string
|
||||||
|
|
||||||
# Try detecting the BOM using bytes from the string
|
# Try detecting the BOM using bytes from the string
|
||||||
encoding = bomDict[string[0...3]] # UTF-8
|
encoding = bom_dict[string[0...3]] # UTF-8
|
||||||
seek = 3
|
seek = 3
|
||||||
|
unless encoding
|
||||||
|
# Need to detect UTF-32 before UTF-16
|
||||||
|
encoding = bom_dict[string] # UTF-32
|
||||||
|
seek = 4
|
||||||
unless encoding
|
unless encoding
|
||||||
# Need to detect UTF-32 before UTF-16
|
encoding = bom_dict[string[0...2]] # UTF-16
|
||||||
encoding = bomDict[string] # UTF-32
|
seek = 2
|
||||||
seek = 4
|
|
||||||
unless encoding
|
|
||||||
encoding = bomDict[string[0...2]] # UTF-16
|
|
||||||
seek = 2
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
#AT - move this to the caller?
|
#AT - move this to the caller?
|
||||||
# Set the read position past the BOM if one was found, otherwise
|
# Set the read position past the BOM if one was found, otherwise
|
||||||
# set it to the start of the stream
|
# set it to the start of the stream
|
||||||
@rawStream.seek(encoding ? seek : 0)
|
@raw_stream.seek(encoding ? seek : 0)
|
||||||
|
|
||||||
return encoding
|
return encoding
|
||||||
end
|
end
|
||||||
|
|
||||||
# Report the encoding declared by the meta element
|
# Report the encoding declared by the meta element
|
||||||
def detectEncodingMeta
|
def detect_encoding_meta
|
||||||
parser = EncodingParser.new(@rawStream.read(@NUM_BYTES_META))
|
parser = EncodingParser.new(@raw_stream.read(@NUM_BYTES_META))
|
||||||
@rawStream.seek(0)
|
@raw_stream.seek(0)
|
||||||
return parser.getEncoding
|
return parser.get_encoding
|
||||||
end
|
end
|
||||||
|
|
||||||
def determineNewLines
|
def determine_new_lines
|
||||||
# Looks through the stream to find where new lines occur so
|
# Looks through the stream to find where new lines occur so
|
||||||
# the position method can tell where it is.
|
# the position method can tell where it is.
|
||||||
@newLines.push(0)
|
@new_lines.push(0)
|
||||||
(0...@dataStream.length).each { |i| @newLines.push(i) if @dataStream[i] == ?\n }
|
(0...@data_stream.length).each { |i| @new_lines.push(i) if @data_stream[i] == ?\n }
|
||||||
end
|
end
|
||||||
|
|
||||||
# Returns (line, col) of the current position in the stream.
|
# Returns (line, col) of the current position in the stream.
|
||||||
def position
|
def position
|
||||||
# Generate list of new lines first time around
|
# Generate list of new lines first time around
|
||||||
determineNewLines if @newLines.empty?
|
determine_new_lines if @new_lines.empty?
|
||||||
line = 0
|
line = 0
|
||||||
tell = @tell
|
tell = @tell
|
||||||
@newLines.each do |pos|
|
@new_lines.each do |pos|
|
||||||
break unless pos < tell
|
break unless pos < tell
|
||||||
line += 1
|
line += 1
|
||||||
end
|
end
|
||||||
col = tell - @newLines[line-1] - 1
|
col = tell - @new_lines[line-1] - 1
|
||||||
return [line, col]
|
return [line, col]
|
||||||
end
|
end
|
||||||
|
|
||||||
# Resets the position in the stream back to the start.
|
# Resets the position in the stream back to the start.
|
||||||
def reset
|
def reset
|
||||||
@tell = 0
|
@tell = 0
|
||||||
end
|
end
|
||||||
|
|
||||||
# Read one character from the stream or queue if available. Return
|
# Read one character from the stream or queue if available. Return
|
||||||
# EOF when EOF is reached.
|
# EOF when EOF is reached.
|
||||||
def char
|
def char
|
||||||
unless @queue.empty?
|
unless @queue.empty?
|
||||||
return @queue.shift
|
return @queue.shift
|
||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
@tell += 1
|
@tell += 1
|
||||||
return @dataStream[@tell - 1].chr
|
return @data_stream[@tell - 1].chr
|
||||||
rescue
|
rescue
|
||||||
return :EOF
|
return :EOF
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Returns a string of characters from the stream up to but not
|
# Returns a string of characters from the stream up to but not
|
||||||
# including any character in characters or EOF. characters can be
|
# including any character in characters or EOF. characters can be
|
||||||
# any container that supports the in method being called on it.
|
# any container that supports the in method being called on it.
|
||||||
def charsUntil(characters, opposite = false)
|
def chars_until(characters, opposite=false)
|
||||||
charStack = [char]
|
char_stack = [char]
|
||||||
|
|
||||||
unless charStack[0] == :EOF
|
unless char_stack[0] == :EOF
|
||||||
while (characters.include? charStack[-1]) == opposite
|
while (characters.include? char_stack[-1]) == opposite
|
||||||
unless @queue.empty?
|
unless @queue.empty?
|
||||||
# First from the queue
|
# First from the queue
|
||||||
charStack.push(@queue.shift)
|
char_stack.push(@queue.shift)
|
||||||
break if charStack[-1] == :EOF
|
break if char_stack[-1] == :EOF
|
||||||
else
|
else
|
||||||
# Then the rest
|
# Then the rest
|
||||||
begin
|
begin
|
||||||
charStack.push(@dataStream[@tell].chr)
|
char_stack.push(@data_stream[@tell].chr)
|
||||||
@tell += 1
|
@tell += 1
|
||||||
rescue
|
rescue
|
||||||
charStack.push(:EOF)
|
char_stack.push(:EOF)
|
||||||
break
|
break
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# Put the character stopped on back to the front of the queue
|
# Put the character stopped on back to the front of the queue
|
||||||
# from where it came.
|
# from where it came.
|
||||||
@queue.insert(0, charStack.pop)
|
@queue.insert(0, char_stack.pop)
|
||||||
return charStack.join('')
|
return char_stack.join('')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# String-like object with an assosiated position and various extra methods
|
# String-like object with an assosiated position and various extra methods
|
||||||
# If the position is ever greater than the string length then an exception is raised
|
# If the position is ever greater than the string length then an exception is raised
|
||||||
class EncodingBytes < String
|
class EncodingBytes < String
|
||||||
|
|
||||||
attr_accessor :position
|
attr_accessor :position
|
||||||
|
|
||||||
def initialize(value)
|
def initialize(value)
|
||||||
super(value)
|
super(value)
|
||||||
@position = -1
|
@position = -1
|
||||||
end
|
end
|
||||||
|
|
||||||
def each
|
def each
|
||||||
while @position < length
|
while @position < length
|
||||||
@position += 1
|
@position += 1
|
||||||
yield self[@position]
|
yield self[@position]
|
||||||
end
|
end
|
||||||
rescue EOF
|
rescue EOF
|
||||||
end
|
end
|
||||||
|
|
||||||
def currentByte
|
def current_byte
|
||||||
raise EOF if @position >= length
|
raise EOF if @position >= length
|
||||||
return self[@position].chr
|
return self[@position].chr
|
||||||
end
|
end
|
||||||
|
|
||||||
# Skip past a list of characters
|
# Skip past a list of characters
|
||||||
def skip(chars = SPACE_CHARACTERS)
|
def skip(chars=SPACE_CHARACTERS)
|
||||||
while chars.include?(currentByte)
|
while chars.include?(current_byte)
|
||||||
@position += 1
|
@position += 1
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Look for a sequence of bytes at the start of a string. If the bytes
|
# Look for a sequence of bytes at the start of a string. If the bytes
|
||||||
# are found return true and advance the position to the byte after the
|
# are found return true and advance the position to the byte after the
|
||||||
# match. Otherwise return false and leave the position alone
|
# match. Otherwise return false and leave the position alone
|
||||||
def matchBytes(bytes, lower = false)
|
def match_bytes(bytes, lower=false)
|
||||||
data = self[position ... position+bytes.length]
|
data = self[position ... position+bytes.length]
|
||||||
data.downcase! if lower
|
data.downcase! if lower
|
||||||
rv = (data == bytes)
|
rv = (data == bytes)
|
||||||
@position += bytes.length if rv == true
|
@position += bytes.length if rv == true
|
||||||
return rv
|
return rv
|
||||||
end
|
end
|
||||||
|
|
||||||
# Look for the next sequence of bytes matching a given sequence. If
|
# Look for the next sequence of bytes matching a given sequence. If
|
||||||
# a match is found advance the position to the last byte of the match
|
# a match is found advance the position to the last byte of the match
|
||||||
def jumpTo(bytes)
|
def jump_to(bytes)
|
||||||
newPosition = self[position .. -1].index(bytes)
|
new_position = self[position .. -1].index(bytes)
|
||||||
if newPosition
|
if new_position
|
||||||
@position += (newPosition + bytes.length-1)
|
@position += (new_position + bytes.length-1)
|
||||||
return true
|
return true
|
||||||
else
|
else
|
||||||
raise EOF
|
raise EOF
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Move the pointer so it points to the next byte in a set of possible
|
# Move the pointer so it points to the next byte in a set of possible
|
||||||
# bytes
|
# bytes
|
||||||
def findNext(byteList)
|
def find_next(byte_list)
|
||||||
until byteList.include?(currentByte)
|
until byte_list.include?(current_byte)
|
||||||
@position += 1
|
@position += 1
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Mini parser for detecting character encoding from meta elements
|
# Mini parser for detecting character encoding from meta elements
|
||||||
class EncodingParser
|
class EncodingParser
|
||||||
|
|
||||||
# string - the data to work on for encoding detection
|
# string - the data to work on for encoding detection
|
||||||
def initialize(data)
|
def initialize(data)
|
||||||
@data = EncodingBytes.new(data.to_s)
|
@data = EncodingBytes.new(data.to_s)
|
||||||
@encoding = nil
|
@encoding = nil
|
||||||
end
|
end
|
||||||
|
|
||||||
@@method_dispatch = [
|
@@method_dispatch = [
|
||||||
['<!--', :handleComment],
|
['<!--', :handle_comment],
|
||||||
['<meta', :handleMeta],
|
['<meta', :handle_meta],
|
||||||
['</', :handlePossibleEndTag],
|
['</', :handle_possible_end_tag],
|
||||||
['<!', :handleOther],
|
['<!', :handle_other],
|
||||||
['<?', :handleOther],
|
['<?', :handle_other],
|
||||||
['<', :handlePossibleStartTag]
|
['<', :handle_possible_start_tag]
|
||||||
]
|
]
|
||||||
|
|
||||||
def getEncoding
|
def get_encoding
|
||||||
@data.each do |byte|
|
@data.each do |byte|
|
||||||
keepParsing = true
|
keep_parsing = true
|
||||||
@@method_dispatch.each do |(key, method)|
|
@@method_dispatch.each do |(key, method)|
|
||||||
if @data.matchBytes(key, lower = true)
|
if @data.match_bytes(key, lower = true)
|
||||||
keepParsing = send(method)
|
keep_parsing = send(method)
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
end
|
|
||||||
break unless keepParsing
|
|
||||||
end
|
end
|
||||||
@encoding = @encoding.strip unless @encoding.nil?
|
break unless keep_parsing
|
||||||
return @encoding
|
end
|
||||||
|
@encoding = @encoding.strip unless @encoding.nil?
|
||||||
|
return @encoding
|
||||||
end
|
end
|
||||||
|
|
||||||
# Skip over comments
|
# Skip over comments
|
||||||
def handleComment
|
def handle_comment
|
||||||
return @data.jumpTo('-->')
|
return @data.jump_to('-->')
|
||||||
end
|
end
|
||||||
|
|
||||||
def handleMeta
|
def handle_meta
|
||||||
# if we have <meta not followed by a space so just keep going
|
# if we have <meta not followed by a space so just keep going
|
||||||
return true unless SPACE_CHARACTERS.include?(@data.currentByte)
|
return true unless SPACE_CHARACTERS.include?(@data.current_byte)
|
||||||
|
|
||||||
#We have a valid meta element we want to search for attributes
|
#We have a valid meta element we want to search for attributes
|
||||||
while true
|
while true
|
||||||
#Try to find the next attribute after the current position
|
#Try to find the next attribute after the current position
|
||||||
attr = getAttribute
|
attr = get_attribute
|
||||||
|
|
||||||
return true if attr.nil?
|
return true if attr.nil?
|
||||||
|
|
||||||
if attr[0] == 'charset'
|
|
||||||
tentativeEncoding = attr[1]
|
|
||||||
if HTML5lib.isValidEncoding(tentativeEncoding)
|
|
||||||
@encoding = tentativeEncoding
|
|
||||||
return false
|
|
||||||
end
|
|
||||||
elsif attr[0] == 'content'
|
|
||||||
contentParser = ContentAttrParser.new(EncodingBytes.new(attr[1]))
|
|
||||||
tentativeEncoding = contentParser.parse
|
|
||||||
if HTML5lib.isValidEncoding(tentativeEncoding)
|
|
||||||
@encoding = tentativeEncoding
|
|
||||||
return false
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def handlePossibleStartTag
|
|
||||||
return handlePossibleTag(false)
|
|
||||||
end
|
|
||||||
|
|
||||||
def handlePossibleEndTag
|
|
||||||
@data.position+=1
|
|
||||||
return handlePossibleTag(true)
|
|
||||||
end
|
|
||||||
|
|
||||||
def handlePossibleTag(endTag)
|
|
||||||
unless ASCII_LETTERS.include?(@data.currentByte)
|
|
||||||
#If the next byte is not an ascii letter either ignore this
|
|
||||||
#fragment (possible start tag case) or treat it according to
|
|
||||||
#handleOther
|
|
||||||
if endTag
|
|
||||||
@data.position -= 1
|
|
||||||
handleOther
|
|
||||||
end
|
|
||||||
return true
|
|
||||||
end
|
|
||||||
|
|
||||||
@data.findNext(SPACE_CHARACTERS + ['<', '>'])
|
if attr[0] == 'charset'
|
||||||
|
tentative_encoding = attr[1]
|
||||||
|
if HTML5lib.is_valid_encoding(tentative_encoding)
|
||||||
|
@encoding = tentative_encoding
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
elsif attr[0] == 'content'
|
||||||
|
content_parser = ContentAttrParser.new(EncodingBytes.new(attr[1]))
|
||||||
|
tentative_encoding = content_parser.parse
|
||||||
|
if HTML5lib.is_valid_encoding(tentative_encoding)
|
||||||
|
@encoding = tentative_encoding
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
if @data.currentByte == '<'
|
def handle_possible_start_tag
|
||||||
#return to the first step in the overall "two step" algorithm
|
return handle_possible_tag(false)
|
||||||
#reprocessing the < byte
|
end
|
||||||
@data.position -= 1
|
|
||||||
else
|
def handle_possible_end_tag
|
||||||
#Read all attributes
|
@data.position += 1
|
||||||
{} until getAttribute.nil?
|
return handle_possible_tag(true)
|
||||||
|
end
|
||||||
|
|
||||||
|
def handle_possible_tag(end_tag)
|
||||||
|
unless ASCII_LETTERS.include?(@data.current_byte)
|
||||||
|
#If the next byte is not an ascii letter either ignore this
|
||||||
|
#fragment (possible start tag case) or treat it according to
|
||||||
|
#handleOther
|
||||||
|
if end_tag
|
||||||
|
@data.position -= 1
|
||||||
|
handle_other
|
||||||
end
|
end
|
||||||
return true
|
return true
|
||||||
|
end
|
||||||
|
|
||||||
|
@data.find_next(SPACE_CHARACTERS + ['<', '>'])
|
||||||
|
|
||||||
|
if @data.current_byte == '<'
|
||||||
|
#return to the first step in the overall "two step" algorithm
|
||||||
|
#reprocessing the < byte
|
||||||
|
@data.position -= 1
|
||||||
|
else
|
||||||
|
#Read all attributes
|
||||||
|
{} until get_attribute.nil?
|
||||||
|
end
|
||||||
|
return true
|
||||||
end
|
end
|
||||||
|
|
||||||
def handleOther
|
def handle_other
|
||||||
return @data.jumpTo('>')
|
return @data.jump_to('>')
|
||||||
end
|
end
|
||||||
|
|
||||||
# Return a name,value pair for the next attribute in the stream,
|
# Return a name,value pair for the next attribute in the stream,
|
||||||
# if one is found, or nil
|
# if one is found, or nil
|
||||||
def getAttribute
|
def get_attribute
|
||||||
@data.skip(SPACE_CHARACTERS + ['/'])
|
@data.skip(SPACE_CHARACTERS + ['/'])
|
||||||
|
|
||||||
if @data.currentByte == '<'
|
if @data.current_byte == '<'
|
||||||
@data.position -= 1
|
@data.position -= 1
|
||||||
return nil
|
return nil
|
||||||
elsif @data.currentByte == '>'
|
elsif @data.current_byte == '>'
|
||||||
return nil
|
return nil
|
||||||
end
|
end
|
||||||
|
|
||||||
attrName = []
|
attr_name = []
|
||||||
attrValue = []
|
attr_value = []
|
||||||
spaceFound = false
|
space_found = false
|
||||||
#Step 5 attribute name
|
#Step 5 attribute name
|
||||||
while true
|
while true
|
||||||
if @data.currentByte == '=' and attrName:
|
if @data.current_byte == '=' and attr_name:
|
||||||
break
|
break
|
||||||
elsif SPACE_CHARACTERS.include?(@data.currentByte)
|
elsif SPACE_CHARACTERS.include?(@data.current_byte)
|
||||||
spaceFound = true
|
space_found = true
|
||||||
break
|
break
|
||||||
elsif ['/', '<', '>'].include?(@data.currentByte)
|
elsif ['/', '<', '>'].include?(@data.current_byte)
|
||||||
return [attrName.join(''), '']
|
return [attr_name.join(''), '']
|
||||||
elsif ASCII_UPPERCASE.include?(@data.currentByte)
|
elsif ASCII_UPPERCASE.include?(@data.current_byte)
|
||||||
attrName.push(@data.currentByte.downcase)
|
attr_name.push(@data.current_byte.downcase)
|
||||||
else
|
|
||||||
attrName.push(@data.currentByte)
|
|
||||||
end
|
|
||||||
#Step 6
|
|
||||||
@data.position += 1
|
|
||||||
end
|
|
||||||
#Step 7
|
|
||||||
if spaceFound
|
|
||||||
@data.skip
|
|
||||||
#Step 8
|
|
||||||
unless @data.currentByte == '='
|
|
||||||
@data.position -= 1
|
|
||||||
return [attrName.join(''), '']
|
|
||||||
end
|
|
||||||
end
|
|
||||||
#XXX need to advance position in both spaces and value case
|
|
||||||
#Step 9
|
|
||||||
@data.position += 1
|
|
||||||
#Step 10
|
|
||||||
@data.skip
|
|
||||||
#Step 11
|
|
||||||
if ["'", '"'].include?(@data.currentByte)
|
|
||||||
#11.1
|
|
||||||
quoteChar = @data.currentByte
|
|
||||||
while true
|
|
||||||
@data.position+=1
|
|
||||||
#11.3
|
|
||||||
if @data.currentByte == quoteChar
|
|
||||||
@data.position += 1
|
|
||||||
return [attrName.join(''), attrValue.join('')]
|
|
||||||
#11.4
|
|
||||||
elsif ASCII_UPPERCASE.include?(@data.currentByte)
|
|
||||||
attrValue.push(@data.currentByte.downcase)
|
|
||||||
#11.5
|
|
||||||
else
|
|
||||||
attrValue.push(@data.currentByte)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
elsif ['>', '<'].include?(@data.currentByte)
|
|
||||||
return [attrName.join(''), '']
|
|
||||||
elsif ASCII_UPPERCASE.include?(@data.currentByte)
|
|
||||||
attrValue.push(@data.currentByte.downcase)
|
|
||||||
else
|
else
|
||||||
attrValue.push(@data.currentByte)
|
attr_name.push(@data.current_byte)
|
||||||
end
|
end
|
||||||
|
#Step 6
|
||||||
|
@data.position += 1
|
||||||
|
end
|
||||||
|
#Step 7
|
||||||
|
if space_found
|
||||||
|
@data.skip
|
||||||
|
#Step 8
|
||||||
|
unless @data.current_byte == '='
|
||||||
|
@data.position -= 1
|
||||||
|
return [attr_name.join(''), '']
|
||||||
|
end
|
||||||
|
end
|
||||||
|
#XXX need to advance position in both spaces and value case
|
||||||
|
#Step 9
|
||||||
|
@data.position += 1
|
||||||
|
#Step 10
|
||||||
|
@data.skip
|
||||||
|
#Step 11
|
||||||
|
if ["'", '"'].include?(@data.current_byte)
|
||||||
|
#11.1
|
||||||
|
quote_char = @data.current_byte
|
||||||
while true
|
while true
|
||||||
@data.position +=1
|
@data.position+=1
|
||||||
if (SPACE_CHARACTERS + ['>', '<']).include?(@data.currentByte)
|
#11.3
|
||||||
return [attrName.join(''), attrValue.join('')]
|
if @data.current_byte == quote_char
|
||||||
elsif ASCII_UPPERCASE.include?(@data.currentByte)
|
@data.position += 1
|
||||||
attrValue.push(@data.currentByte.downcase)
|
return [attr_name.join(''), attr_value.join('')]
|
||||||
else
|
#11.4
|
||||||
attrValue.push(@data.currentByte)
|
elsif ASCII_UPPERCASE.include?(@data.current_byte)
|
||||||
end
|
attr_value.push(@data.current_byte.downcase)
|
||||||
|
#11.5
|
||||||
|
else
|
||||||
|
attr_value.push(@data.current_byte)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
elsif ['>', '<'].include?(@data.current_byte)
|
||||||
|
return [attr_name.join(''), '']
|
||||||
|
elsif ASCII_UPPERCASE.include?(@data.current_byte)
|
||||||
|
attr_value.push(@data.current_byte.downcase)
|
||||||
|
else
|
||||||
|
attr_value.push(@data.current_byte)
|
||||||
|
end
|
||||||
|
while true
|
||||||
|
@data.position += 1
|
||||||
|
if (SPACE_CHARACTERS + ['>', '<']).include?(@data.current_byte)
|
||||||
|
return [attr_name.join(''), attr_value.join('')]
|
||||||
|
elsif ASCII_UPPERCASE.include?(@data.current_byte)
|
||||||
|
attr_value.push(@data.current_byte.downcase)
|
||||||
|
else
|
||||||
|
attr_value.push(@data.current_byte)
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class ContentAttrParser
|
class ContentAttrParser
|
||||||
def initialize(data)
|
def initialize(data)
|
||||||
@data = data
|
@data = data
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse
|
def parse
|
||||||
begin
|
begin
|
||||||
#Skip to the first ";"
|
#Skip to the first ";"
|
||||||
@data.position = 0
|
@data.position = 0
|
||||||
@data.jumpTo(';')
|
@data.jump_to(';')
|
||||||
@data.position += 1
|
@data.position += 1
|
||||||
@data.skip
|
@data.skip
|
||||||
#Check if the attr name is charset
|
#Check if the attr name is charset
|
||||||
#otherwise return
|
#otherwise return
|
||||||
@data.jumpTo('charset')
|
@data.jump_to('charset')
|
||||||
@data.position += 1
|
@data.position += 1
|
||||||
@data.skip
|
@data.skip
|
||||||
unless @data.currentByte == '='
|
unless @data.current_byte == '='
|
||||||
#If there is no = sign keep looking for attrs
|
#If there is no = sign keep looking for attrs
|
||||||
return nil
|
return nil
|
||||||
end
|
|
||||||
@data.position += 1
|
|
||||||
@data.skip
|
|
||||||
#Look for an encoding between matching quote marks
|
|
||||||
if ['"', "'"].include?(@data.currentByte)
|
|
||||||
quoteMark = @data.currentByte
|
|
||||||
@data.position += 1
|
|
||||||
oldPosition = @data.position
|
|
||||||
@data.jumpTo(quoteMark)
|
|
||||||
return @data[oldPosition ... @data.position]
|
|
||||||
else
|
|
||||||
#Unquoted value
|
|
||||||
oldPosition = @data.position
|
|
||||||
begin
|
|
||||||
@data.findNext(SPACE_CHARACTERS)
|
|
||||||
return @data[oldPosition ... @data.position]
|
|
||||||
rescue EOF
|
|
||||||
#Return the whole remaining value
|
|
||||||
return @data[oldPosition .. -1]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
rescue EOF
|
|
||||||
return nil
|
|
||||||
end
|
end
|
||||||
|
@data.position += 1
|
||||||
|
@data.skip
|
||||||
|
#Look for an encoding between matching quote marks
|
||||||
|
if ['"', "'"].include?(@data.current_byte)
|
||||||
|
quote_mark = @data.current_byte
|
||||||
|
@data.position += 1
|
||||||
|
old_position = @data.position
|
||||||
|
@data.jump_to(quote_mark)
|
||||||
|
return @data[old_position ... @data.position]
|
||||||
|
else
|
||||||
|
#Unquoted value
|
||||||
|
old_position = @data.position
|
||||||
|
begin
|
||||||
|
@data.find_next(SPACE_CHARACTERS)
|
||||||
|
return @data[old_position ... @data.position]
|
||||||
|
rescue EOF
|
||||||
|
#Return the whole remaining value
|
||||||
|
return @data[old_position .. -1]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
rescue EOF
|
||||||
|
return nil
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Determine if a string is a supported encoding
|
# Determine if a string is a supported encoding
|
||||||
def self.isValidEncoding(encoding)
|
def self.is_valid_encoding(encoding)
|
||||||
(not encoding.nil? and encoding.kind_of?(String) and ENCODINGS.include?(encoding.downcase.strip))
|
(not encoding.nil? and encoding.kind_of?(String) and ENCODINGS.include?(encoding.downcase.strip))
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -16,126 +16,126 @@ require 'html5lib/constants'
|
||||||
|
|
||||||
module HTML5lib
|
module HTML5lib
|
||||||
|
|
||||||
# liberal XML parser
|
# liberal XML parser
|
||||||
class XMLParser < HTMLParser
|
class XMLParser < HTMLParser
|
||||||
|
|
||||||
def initialize(options={})
|
def initialize(options = {})
|
||||||
super options
|
super options
|
||||||
@phases[:initial] = XmlRootPhase.new(self, @tree)
|
@phases[:initial] = XmlRootPhase.new(self, @tree)
|
||||||
end
|
end
|
||||||
|
|
||||||
def normalizeToken(token)
|
def normalizeToken(token)
|
||||||
if token[:type] == :StartTag or token[:type] == :EmptyTag
|
if token[:type] == :StartTag or token[:type] == :EmptyTag
|
||||||
# We need to remove the duplicate attributes and convert attributes
|
# We need to remove the duplicate attributes and convert attributes
|
||||||
# to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
|
# to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
|
||||||
|
|
||||||
token[:data] = Hash[*token[:data].reverse.flatten]
|
token[:data] = Hash[*token[:data].reverse.flatten]
|
||||||
|
|
||||||
# For EmptyTags, process both a Start and an End tag
|
# For EmptyTags, process both a Start and an End tag
|
||||||
if token[:type] == :EmptyTag
|
if token[:type] == :EmptyTag
|
||||||
@phase.processStartTag(token[:name], token[:data])
|
@phase.processStartTag(token[:name], token[:data])
|
||||||
token[:data] = {}
|
token[:data] = {}
|
||||||
token[:type] = :EndTag
|
token[:type] = :EndTag
|
||||||
end
|
|
||||||
|
|
||||||
elsif token[:type] == :EndTag
|
|
||||||
if token[:data]
|
|
||||||
parseError(_("End tag contains unexpected attributes."))
|
|
||||||
end
|
|
||||||
|
|
||||||
elsif token[:type] == :Comment
|
|
||||||
# Rescue CDATA from the comments
|
|
||||||
if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]"
|
|
||||||
token[:type] = :Characters
|
|
||||||
token[:data] = token[:data][7 ... -2]
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
return token
|
elsif token[:type] == :EndTag
|
||||||
|
if token[:data]
|
||||||
|
parseError(_("End tag contains unexpected attributes."))
|
||||||
|
end
|
||||||
|
|
||||||
|
elsif token[:type] == :Comment
|
||||||
|
# Rescue CDATA from the comments
|
||||||
|
if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]"
|
||||||
|
token[:type] = :Characters
|
||||||
|
token[:data] = token[:data][7 ... -2]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return token
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# liberal XMTHML parser
|
# liberal XMTHML parser
|
||||||
class XHTMLParser < XMLParser
|
class XHTMLParser < XMLParser
|
||||||
|
|
||||||
def initialize(options={})
|
def initialize(options = {})
|
||||||
super options
|
super options
|
||||||
@phases[:initial] = InitialPhase.new(self, @tree)
|
@phases[:initial] = InitialPhase.new(self, @tree)
|
||||||
@phases[:rootElement] = XhmlRootPhase.new(self, @tree)
|
@phases[:rootElement] = XhmlRootPhase.new(self, @tree)
|
||||||
end
|
end
|
||||||
|
|
||||||
def normalizeToken(token)
|
def normalizeToken(token)
|
||||||
super(token)
|
super(token)
|
||||||
|
|
||||||
# ensure that non-void XHTML elements have content so that separate
|
# ensure that non-void XHTML elements have content so that separate
|
||||||
# open and close tags are emitted
|
# open and close tags are emitted
|
||||||
if token[:type] == :EndTag and \
|
if token[:type] == :EndTag and \
|
||||||
not VOID_ELEMENTS.include? token[:name] and \
|
not VOID_ELEMENTS.include? token[:name] and \
|
||||||
token[:name] == @tree.openElements[-1].name and \
|
token[:name] == @tree.openElements[-1].name and \
|
||||||
not @tree.openElements[-1].hasContent
|
not @tree.openElements[-1].hasContent
|
||||||
@tree.insertText('') unless
|
@tree.insertText('') unless
|
||||||
@tree.openElements.any? {|e|
|
@tree.openElements.any? {|e|
|
||||||
e.attributes.keys.include? 'xmlns' and
|
e.attributes.keys.include? 'xmlns' and
|
||||||
e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
|
e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
return token
|
return token
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class XhmlRootPhase < RootElementPhase
|
class XhmlRootPhase < RootElementPhase
|
||||||
def insertHtmlElement
|
def insertHtmlElement
|
||||||
element = @tree.createElement("html", {'xmlns' => 'http://www.w3.org/1999/xhtml'})
|
element = @tree.createElement("html", {'xmlns' => 'http://www.w3.org/1999/xhtml'})
|
||||||
@tree.openElements.push(element)
|
@tree.openElements.push(element)
|
||||||
@tree.document.appendChild(element)
|
@tree.document.appendChild(element)
|
||||||
@parser.phase = @parser.phases[:beforeHead]
|
@parser.phase = @parser.phases[:beforeHead]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class XmlRootPhase < Phase
|
class XmlRootPhase < Phase
|
||||||
# Prime the Xml parser
|
# Prime the Xml parser
|
||||||
@start_tag_handlers = Hash.new(:startTagOther)
|
@start_tag_handlers = Hash.new(:startTagOther)
|
||||||
@end_tag_handlers = Hash.new(:endTagOther)
|
@end_tag_handlers = Hash.new(:endTagOther)
|
||||||
def startTagOther(name, attributes)
|
def startTagOther(name, attributes)
|
||||||
@tree.openElements.push(@tree.document)
|
@tree.openElements.push(@tree.document)
|
||||||
element = @tree.createElement(name, attributes)
|
element = @tree.createElement(name, attributes)
|
||||||
@tree.openElements[-1].appendChild(element)
|
@tree.openElements[-1].appendChild(element)
|
||||||
@tree.openElements.push(element)
|
@tree.openElements.push(element)
|
||||||
@parser.phase = XmlElementPhase.new(@parser,@tree)
|
@parser.phase = XmlElementPhase.new(@parser,@tree)
|
||||||
end
|
end
|
||||||
def endTagOther(name)
|
def endTagOther(name)
|
||||||
super
|
super
|
||||||
@tree.openElements.pop
|
@tree.openElements.pop
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class XmlElementPhase < Phase
|
class XmlElementPhase < Phase
|
||||||
# Generic handling for all XML elements
|
# Generic handling for all XML elements
|
||||||
|
|
||||||
@start_tag_handlers = Hash.new(:startTagOther)
|
@start_tag_handlers = Hash.new(:startTagOther)
|
||||||
@end_tag_handlers = Hash.new(:endTagOther)
|
@end_tag_handlers = Hash.new(:endTagOther)
|
||||||
|
|
||||||
def startTagOther(name, attributes)
|
def startTagOther(name, attributes)
|
||||||
element = @tree.createElement(name, attributes)
|
element = @tree.createElement(name, attributes)
|
||||||
@tree.openElements[-1].appendChild(element)
|
@tree.openElements[-1].appendChild(element)
|
||||||
@tree.openElements.push(element)
|
@tree.openElements.push(element)
|
||||||
end
|
end
|
||||||
|
|
||||||
def endTagOther(name)
|
def endTagOther(name)
|
||||||
for node in @tree.openElements.reverse
|
for node in @tree.openElements.reverse
|
||||||
if node.name == name
|
if node.name == name
|
||||||
{} while @tree.openElements.pop != node
|
{} while @tree.openElements.pop != node
|
||||||
break
|
break
|
||||||
else
|
else
|
||||||
@parser.parseError
|
@parser.parseError
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def processCharacters(data)
|
def processCharacters(data)
|
||||||
@tree.insertText(data)
|
@tree.insertText(data)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
247
vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb
vendored
247
vendor/plugins/HTML5lib/lib/html5lib/sanitizer.rb
vendored
|
@ -6,87 +6,87 @@ module HTML5lib
|
||||||
# This module provides sanitization of XHTML+MathML+SVG
|
# This module provides sanitization of XHTML+MathML+SVG
|
||||||
# and of inline style attributes.
|
# and of inline style attributes.
|
||||||
|
|
||||||
class HTMLSanitizer < HTMLTokenizer
|
class HTMLSanitizer < HTMLTokenizer
|
||||||
|
|
||||||
ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br
|
ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br
|
||||||
button caption center cite code col colgroup dd del dfn dir div dl dt
|
button caption center cite code col colgroup dd del dfn dir div dl dt
|
||||||
em fieldset font form h1 h2 h3 h4 h5 h6 hr i img input ins kbd label
|
em fieldset font form h1 h2 h3 h4 h5 h6 hr i img input ins kbd label
|
||||||
legend li map menu ol optgroup option p pre q s samp select small span
|
legend li map menu ol optgroup option p pre q s samp select small span
|
||||||
strike strong sub sup table tbody td textarea tfoot th thead tr tt u
|
strike strong sub sup table tbody td textarea tfoot th thead tr tt u
|
||||||
ul var]
|
ul var]
|
||||||
|
|
||||||
MATHML_ELEMENTS = %w[maction math merror mfrac mi mmultiscripts mn mo
|
MATHML_ELEMENTS = %w[maction math merror mfrac mi mmultiscripts mn mo
|
||||||
mover mpadded mphantom mprescripts mroot mrow mspace msqrt mstyle msub
|
mover mpadded mphantom mprescripts mroot mrow mspace msqrt mstyle msub
|
||||||
msubsup msup mtable mtd mtext mtr munder munderover none]
|
msubsup msup mtable mtd mtext mtr munder munderover none]
|
||||||
|
|
||||||
SVG_ELEMENTS = %w[a animate animateColor animateMotion animateTransform
|
SVG_ELEMENTS = %w[a animate animateColor animateMotion animateTransform
|
||||||
circle defs desc ellipse font-face font-face-name font-face-src g
|
circle defs desc ellipse font-face font-face-name font-face-src g
|
||||||
glyph hkern image linearGradient line marker metadata missing-glyph
|
glyph hkern image linearGradient line marker metadata missing-glyph
|
||||||
mpath path polygon polyline radialGradient rect set stop svg switch
|
mpath path polygon polyline radialGradient rect set stop svg switch
|
||||||
text title tspan use]
|
text title tspan use]
|
||||||
|
|
||||||
ACCEPTABLE_ATTRIBUTES = %w[abbr accept accept-charset accesskey action
|
ACCEPTABLE_ATTRIBUTES = %w[abbr accept accept-charset accesskey action
|
||||||
align alt axis border cellpadding cellspacing char charoff charset
|
align alt axis border cellpadding cellspacing char charoff charset
|
||||||
checked cite class clear cols colspan color compact coords datetime
|
checked cite class clear cols colspan color compact coords datetime
|
||||||
dir disabled enctype for frame headers height href hreflang hspace id
|
dir disabled enctype for frame headers height href hreflang hspace id
|
||||||
ismap label lang longdesc maxlength media method multiple name nohref
|
ismap label lang longdesc maxlength media method multiple name nohref
|
||||||
noshade nowrap prompt readonly rel rev rows rowspan rules scope
|
noshade nowrap prompt readonly rel rev rows rowspan rules scope
|
||||||
selected shape size span src start style summary tabindex target title
|
selected shape size span src start style summary tabindex target title
|
||||||
type usemap valign value vspace width xml:lang]
|
type usemap valign value vspace width xml:lang]
|
||||||
|
|
||||||
MATHML_ATTRIBUTES = %w[actiontype align columnalign columnalign
|
MATHML_ATTRIBUTES = %w[actiontype align columnalign columnalign
|
||||||
columnalign columnlines columnspacing columnspan depth display
|
columnalign columnlines columnspacing columnspan depth display
|
||||||
displaystyle equalcolumns equalrows fence fontstyle fontweight frame
|
displaystyle equalcolumns equalrows fence fontstyle fontweight frame
|
||||||
height linethickness lspace mathbackground mathcolor mathvariant
|
height linethickness lspace mathbackground mathcolor mathvariant
|
||||||
mathvariant maxsize minsize other rowalign rowalign rowalign rowlines
|
mathvariant maxsize minsize other rowalign rowalign rowalign rowlines
|
||||||
rowspacing rowspan rspace scriptlevel selection separator stretchy
|
rowspacing rowspan rspace scriptlevel selection separator stretchy
|
||||||
width width xlink:href xlink:show xlink:type xmlns xmlns:xlink]
|
width width xlink:href xlink:show xlink:type xmlns xmlns:xlink]
|
||||||
|
|
||||||
SVG_ATTRIBUTES = %w[accent-height accumulate additive alphabetic
|
SVG_ATTRIBUTES = %w[accent-height accumulate additive alphabetic
|
||||||
arabic-form ascent attributeName attributeType baseProfile bbox begin
|
arabic-form ascent attributeName attributeType baseProfile bbox begin
|
||||||
by calcMode cap-height class color color-rendering content cx cy d dx
|
by calcMode cap-height class color color-rendering content cx cy d dx
|
||||||
dy descent display dur end fill fill-rule font-family font-size
|
dy descent display dur end fill fill-rule font-family font-size
|
||||||
font-stretch font-style font-variant font-weight from fx fy g1 g2
|
font-stretch font-style font-variant font-weight from fx fy g1 g2
|
||||||
glyph-name gradientUnits hanging height horiz-adv-x horiz-origin-x id
|
glyph-name gradientUnits hanging height horiz-adv-x horiz-origin-x id
|
||||||
ideographic k keyPoints keySplines keyTimes lang marker-end
|
ideographic k keyPoints keySplines keyTimes lang marker-end
|
||||||
marker-mid marker-start markerHeight markerUnits markerWidth
|
marker-mid marker-start markerHeight markerUnits markerWidth
|
||||||
mathematical max min name offset opacity orient origin
|
mathematical max min name offset opacity orient origin
|
||||||
overline-position overline-thickness panose-1 path pathLength points
|
overline-position overline-thickness panose-1 path pathLength points
|
||||||
preserveAspectRatio r refX refY repeatCount repeatDur
|
preserveAspectRatio r refX refY repeatCount repeatDur
|
||||||
requiredExtensions requiredFeatures restart rotate rx ry slope stemh
|
requiredExtensions requiredFeatures restart rotate rx ry slope stemh
|
||||||
stemv stop-color stop-opacity strikethrough-position
|
stemv stop-color stop-opacity strikethrough-position
|
||||||
strikethrough-thickness stroke stroke-dasharray stroke-dashoffset
|
strikethrough-thickness stroke stroke-dasharray stroke-dashoffset
|
||||||
stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity
|
stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity
|
||||||
stroke-width systemLanguage target text-anchor to transform type u1
|
stroke-width systemLanguage target text-anchor to transform type u1
|
||||||
u2 underline-position underline-thickness unicode unicode-range
|
u2 underline-position underline-thickness unicode unicode-range
|
||||||
units-per-em values version viewBox visibility width widths x
|
units-per-em values version viewBox visibility width widths x
|
||||||
x-height x1 x2 xlink:actuate xlink:arcrole xlink:href xlink:role
|
x-height x1 x2 xlink:actuate xlink:arcrole xlink:href xlink:role
|
||||||
xlink:show xlink:title xlink:type xml:base xml:lang xml:space xmlns
|
xlink:show xlink:title xlink:type xml:base xml:lang xml:space xmlns
|
||||||
xmlns:xlink y y1 y2 zoomAndPan]
|
xmlns:xlink y y1 y2 zoomAndPan]
|
||||||
|
|
||||||
ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href]
|
ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href]
|
||||||
|
|
||||||
ACCEPTABLE_CSS_PROPERTIES = %w[azimuth background-color
|
ACCEPTABLE_CSS_PROPERTIES = %w[azimuth background-color
|
||||||
border-bottom-color border-collapse border-color border-left-color
|
border-bottom-color border-collapse border-color border-left-color
|
||||||
border-right-color border-top-color clear color cursor direction
|
border-right-color border-top-color clear color cursor direction
|
||||||
display elevation float font font-family font-size font-style
|
display elevation float font font-family font-size font-style
|
||||||
font-variant font-weight height letter-spacing line-height overflow
|
font-variant font-weight height letter-spacing line-height overflow
|
||||||
pause pause-after pause-before pitch pitch-range richness speak
|
pause pause-after pause-before pitch pitch-range richness speak
|
||||||
speak-header speak-numeral speak-punctuation speech-rate stress
|
speak-header speak-numeral speak-punctuation speech-rate stress
|
||||||
text-align text-decoration text-indent unicode-bidi vertical-align
|
text-align text-decoration text-indent unicode-bidi vertical-align
|
||||||
voice-family volume white-space width]
|
voice-family volume white-space width]
|
||||||
|
|
||||||
ACCEPTABLE_CSS_KEYWORDS = %w[auto aqua black block blue bold both bottom
|
ACCEPTABLE_CSS_KEYWORDS = %w[auto aqua black block blue bold both bottom
|
||||||
brown center collapse dashed dotted fuchsia gray green !important
|
brown center collapse dashed dotted fuchsia gray green !important
|
||||||
italic left lime maroon medium none navy normal nowrap olive pointer
|
italic left lime maroon medium none navy normal nowrap olive pointer
|
||||||
purple red right solid silver teal top transparent underline white
|
purple red right solid silver teal top transparent underline white
|
||||||
yellow]
|
yellow]
|
||||||
|
|
||||||
ACCEPTABLE_SVG_PROPERTIES = %w[fill fill-opacity fill-rule stroke
|
ACCEPTABLE_SVG_PROPERTIES = %w[fill fill-opacity fill-rule stroke
|
||||||
stroke-width stroke-linecap stroke-linejoin stroke-opacity]
|
stroke-width stroke-linecap stroke-linejoin stroke-opacity]
|
||||||
|
|
||||||
ACCEPTABLE_PROTOCOLS = %w[ed2k ftp http https irc mailto news gopher nntp
|
ACCEPTABLE_PROTOCOLS = %w[ed2k ftp http https irc mailto news gopher nntp
|
||||||
telnet webcal xmpp callto feed urn aim rsync tag ssh sftp rtsp afs]
|
telnet webcal xmpp callto feed urn aim rsync tag ssh sftp rtsp afs]
|
||||||
|
|
||||||
# subclasses may define their own versions of these constants
|
# subclasses may define their own versions of these constants
|
||||||
ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
|
ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
|
||||||
|
@ -104,75 +104,74 @@ class HTMLSanitizer < HTMLTokenizer
|
||||||
# in ALLOWED_PROTOCOLS are allowed.
|
# in ALLOWED_PROTOCOLS are allowed.
|
||||||
#
|
#
|
||||||
# sanitize_html('<script> do_nasty_stuff() </script>')
|
# sanitize_html('<script> do_nasty_stuff() </script>')
|
||||||
# => <script> do_nasty_stuff() </script>
|
# => <script> do_nasty_stuff() </script>
|
||||||
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
|
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
|
||||||
# => <a>Click here for $100</a>
|
# => <a>Click here for $100</a>
|
||||||
def each
|
def each
|
||||||
super do |token|
|
super do |token|
|
||||||
case token[:type]
|
case token[:type]
|
||||||
when :StartTag, :EndTag, :EmptyTag
|
when :StartTag, :EndTag, :EmptyTag
|
||||||
if ALLOWED_ELEMENTS.include?(token[:name])
|
if ALLOWED_ELEMENTS.include?(token[:name])
|
||||||
if token.has_key? :data
|
if token.has_key? :data
|
||||||
attrs = Hash[*token[:data].flatten]
|
attrs = Hash[*token[:data].flatten]
|
||||||
attrs.delete_if { |attr,v| !ALLOWED_ATTRIBUTES.include?(attr) }
|
attrs.delete_if { |attr,v| !ALLOWED_ATTRIBUTES.include?(attr) }
|
||||||
ATTR_VAL_IS_URI.each do |attr|
|
ATTR_VAL_IS_URI.each do |attr|
|
||||||
val_unescaped = CGI.unescapeHTML(attrs[attr].to_s).gsub(/[\000-\040\177\s]+|\302[\200-\240]/,'').downcase
|
val_unescaped = CGI.unescapeHTML(attrs[attr].to_s).gsub(/[\000-\040\177\s]+|\302[\200-\240]/,'').downcase
|
||||||
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0])
|
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0])
|
||||||
attrs.delete attr
|
attrs.delete attr
|
||||||
end
|
|
||||||
end
|
|
||||||
if attrs['style']
|
|
||||||
attrs['style'] = sanitize_css(attrs['style'])
|
|
||||||
end
|
|
||||||
token[:data] = attrs.map {|k,v| [k,v]}
|
|
||||||
end
|
|
||||||
yield token
|
|
||||||
else
|
|
||||||
if token[:type] == :EndTag
|
|
||||||
token[:data] = "</#{token[:name]}>"
|
|
||||||
elsif token[:data]
|
|
||||||
attrs = token[:data].map {|k,v| " #{k}=\"#{CGI.escapeHTML(v)}\""}.join('')
|
|
||||||
token[:data] = "<#{token[:name]}#{attrs}>"
|
|
||||||
else
|
|
||||||
token[:data] = "<#{token[:name]}>"
|
|
||||||
end
|
|
||||||
token[:data].insert(-2,'/') if token[:type] == :EmptyTag
|
|
||||||
token[:type] = :Characters
|
|
||||||
token.delete(:name)
|
|
||||||
yield token
|
|
||||||
end
|
end
|
||||||
else
|
|
||||||
yield token
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def sanitize_css(style)
|
|
||||||
# disallow urls
|
|
||||||
style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
|
|
||||||
|
|
||||||
# gauntlet
|
|
||||||
return '' unless style =~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/
|
|
||||||
return '' unless style =~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$/
|
|
||||||
|
|
||||||
clean = []
|
|
||||||
style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val|
|
|
||||||
next if val.empty?
|
|
||||||
prop.downcase!
|
|
||||||
if ALLOWED_CSS_PROPERTIES.include?(prop)
|
|
||||||
clean << "#{prop}: #{val};"
|
|
||||||
elsif %w[background border margin padding].include?(prop.split('-')[0])
|
|
||||||
clean << "#{prop}: #{val};" unless val.split().any? do |keyword|
|
|
||||||
!ALLOWED_CSS_KEYWORDS.include?(keyword) and
|
|
||||||
keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
|
|
||||||
end
|
|
||||||
elsif ALLOWED_SVG_PROPERTIES.include?(prop)
|
|
||||||
clean << "#{prop}: #{val};"
|
|
||||||
end
|
end
|
||||||
|
if attrs['style']
|
||||||
|
attrs['style'] = sanitize_css(attrs['style'])
|
||||||
|
end
|
||||||
|
token[:data] = attrs.map {|k,v| [k,v]}
|
||||||
|
end
|
||||||
|
yield token
|
||||||
|
else
|
||||||
|
if token[:type] == :EndTag
|
||||||
|
token[:data] = "</#{token[:name]}>"
|
||||||
|
elsif token[:data]
|
||||||
|
attrs = token[:data].map {|k,v| " #{k}=\"#{CGI.escapeHTML(v)}\""}.join('')
|
||||||
|
token[:data] = "<#{token[:name]}#{attrs}>"
|
||||||
|
else
|
||||||
|
token[:data] = "<#{token[:name]}>"
|
||||||
|
end
|
||||||
|
token[:data].insert(-2,'/') if token[:type] == :EmptyTag
|
||||||
|
token[:type] = :Characters
|
||||||
|
token.delete(:name)
|
||||||
|
yield token
|
||||||
end
|
end
|
||||||
|
else
|
||||||
style = clean.join(' ')
|
yield token
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def sanitize_css(style)
|
||||||
|
# disallow urls
|
||||||
|
style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
|
||||||
|
|
||||||
|
# gauntlet
|
||||||
|
return '' unless style =~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/
|
||||||
|
return '' unless style =~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$/
|
||||||
|
|
||||||
|
clean = []
|
||||||
|
style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop, val|
|
||||||
|
next if val.empty?
|
||||||
|
prop.downcase!
|
||||||
|
if ALLOWED_CSS_PROPERTIES.include?(prop)
|
||||||
|
clean << "#{prop}: #{val};"
|
||||||
|
elsif %w[background border margin padding].include?(prop.split('-')[0])
|
||||||
|
clean << "#{prop}: #{val};" unless val.split().any? do |keyword|
|
||||||
|
!ALLOWED_CSS_KEYWORDS.include?(keyword) and
|
||||||
|
keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
|
||||||
|
end
|
||||||
|
elsif ALLOWED_SVG_PROPERTIES.include?(prop)
|
||||||
|
clean << "#{prop}: #{val};"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
style = clean.join(' ')
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
1392
vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
vendored
1392
vendor/plugins/HTML5lib/lib/html5lib/tokenizer.rb
vendored
File diff suppressed because it is too large
Load diff
|
@ -1,21 +1,21 @@
|
||||||
module HTML5lib
|
module HTML5lib
|
||||||
module TreeBuilders
|
module TreeBuilders
|
||||||
|
|
||||||
def self.getTreeBuilder(name)
|
def self.getTreeBuilder(name)
|
||||||
case name.to_s.downcase
|
case name.to_s.downcase
|
||||||
when 'simpletree' then
|
when 'simpletree' then
|
||||||
require 'html5lib/treebuilders/simpletree'
|
require 'html5lib/treebuilders/simpletree'
|
||||||
SimpleTree::TreeBuilder
|
SimpleTree::TreeBuilder
|
||||||
when 'rexml' then
|
when 'rexml' then
|
||||||
require 'html5lib/treebuilders/rexml'
|
require 'html5lib/treebuilders/rexml'
|
||||||
REXMLTree::TreeBuilder
|
REXMLTree::TreeBuilder
|
||||||
when 'hpricot' then
|
when 'hpricot' then
|
||||||
require 'html5lib/treebuilders/hpricot'
|
require 'html5lib/treebuilders/hpricot'
|
||||||
Hpricot::TreeBuilder
|
Hpricot::TreeBuilder
|
||||||
else
|
else
|
||||||
raise "Unknown TreeBuilder #{name}"
|
raise "Unknown TreeBuilder #{name}"
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -4,166 +4,166 @@ require 'html5lib/constants'
|
||||||
|
|
||||||
module HTML5lib
|
module HTML5lib
|
||||||
|
|
||||||
# The scope markers are inserted when entering buttons, object elements,
|
# The scope markers are inserted when entering buttons, object elements,
|
||||||
# marquees, table cells, and table captions, and are used to prevent formatting
|
# marquees, table cells, and table captions, and are used to prevent formatting
|
||||||
# from "leaking" into tables, buttons, object elements, and marquees.
|
# from "leaking" into tables, buttons, object elements, and marquees.
|
||||||
Marker = nil
|
Marker = nil
|
||||||
|
|
||||||
module TreeBuilders
|
module TreeBuilders
|
||||||
module Base
|
module Base
|
||||||
|
|
||||||
class Node
|
class Node
|
||||||
# The parent of the current node (or nil for the document node)
|
# The parent of the current node (or nil for the document node)
|
||||||
attr_accessor :parent
|
attr_accessor :parent
|
||||||
|
|
||||||
# a list of child nodes of the current node. This must
|
# a list of child nodes of the current node. This must
|
||||||
# include all elements but not necessarily other node types
|
# include all elements but not necessarily other node types
|
||||||
attr_accessor :childNodes
|
attr_accessor :childNodes
|
||||||
|
|
||||||
# A list of miscellaneous flags that can be set on the node
|
# A list of miscellaneous flags that can be set on the node
|
||||||
attr_accessor :_flags
|
attr_accessor :_flags
|
||||||
|
|
||||||
def initialize(name)
|
def initialize(name)
|
||||||
@parent = nil
|
@parent = nil
|
||||||
@childNodes = []
|
@childNodes = []
|
||||||
@_flags = []
|
@_flags = []
|
||||||
end
|
|
||||||
|
|
||||||
# Insert node as a child of the current node
|
|
||||||
def appendChild(node)
|
|
||||||
raise NotImplementedError
|
|
||||||
end
|
|
||||||
|
|
||||||
# Insert data as text in the current node, positioned before the
|
|
||||||
# start of node insertBefore or to the end of the node's text.
|
|
||||||
def insertText(data, insertBefore = nil)
|
|
||||||
raise NotImplementedError
|
|
||||||
end
|
|
||||||
|
|
||||||
# Insert node as a child of the current node, before refNode in the
|
|
||||||
# list of child nodes. Raises ValueError if refNode is not a child of
|
|
||||||
# the current node
|
|
||||||
def insertBefore(node, refNode)
|
|
||||||
raise NotImplementedError
|
|
||||||
end
|
|
||||||
|
|
||||||
# Remove node from the children of the current node
|
|
||||||
def removeChild(node)
|
|
||||||
raise NotImplementedError
|
|
||||||
end
|
|
||||||
|
|
||||||
# Move all the children of the current node to newParent.
|
|
||||||
# This is needed so that trees that don't store text as nodes move the
|
|
||||||
# text in the correct way
|
|
||||||
def reparentChildren(newParent)
|
|
||||||
#XXX - should this method be made more general?
|
|
||||||
@childNodes.each { |child| newParent.appendChild(child) }
|
|
||||||
@childNodes = []
|
|
||||||
end
|
|
||||||
|
|
||||||
# Return a shallow copy of the current node i.e. a node with the same
|
|
||||||
# name and attributes but with no parent or child nodes
|
|
||||||
def cloneNode
|
|
||||||
raise NotImplementedError
|
|
||||||
end
|
|
||||||
|
|
||||||
# Return true if the node has children or text, false otherwise
|
|
||||||
def hasContent
|
|
||||||
raise NotImplementedError
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Base treebuilder implementation
|
|
||||||
class TreeBuilder
|
|
||||||
|
|
||||||
attr_accessor :openElements
|
|
||||||
|
|
||||||
attr_accessor :activeFormattingElements
|
|
||||||
|
|
||||||
attr_accessor :document
|
|
||||||
|
|
||||||
attr_accessor :headPointer
|
|
||||||
|
|
||||||
attr_accessor :formPointer
|
|
||||||
|
|
||||||
# Class to use for document root
|
|
||||||
documentClass = nil
|
|
||||||
|
|
||||||
# Class to use for HTML elements
|
|
||||||
elementClass = nil
|
|
||||||
|
|
||||||
# Class to use for comments
|
|
||||||
commentClass = nil
|
|
||||||
|
|
||||||
# Class to use for doctypes
|
|
||||||
doctypeClass = nil
|
|
||||||
|
|
||||||
# Fragment class
|
|
||||||
fragmentClass = nil
|
|
||||||
|
|
||||||
def initialize
|
|
||||||
reset
|
|
||||||
end
|
|
||||||
|
|
||||||
def reset
|
|
||||||
@openElements = []
|
|
||||||
@activeFormattingElements = []
|
|
||||||
|
|
||||||
#XXX - rename these to headElement, formElement
|
|
||||||
@headPointer = nil
|
|
||||||
@formPointer = nil
|
|
||||||
|
|
||||||
self.insertFromTable = false
|
|
||||||
|
|
||||||
@document = @documentClass.new
|
|
||||||
end
|
|
||||||
|
|
||||||
def elementInScope(target, tableVariant = false)
|
|
||||||
# Exit early when possible.
|
|
||||||
return true if @openElements[-1].name == target
|
|
||||||
|
|
||||||
# AT How about while true and simply set node to [-1] and set it to
|
|
||||||
# [-2] at the end...
|
|
||||||
@openElements.reverse.each do |element|
|
|
||||||
if element.name == target
|
|
||||||
return true
|
|
||||||
elsif element.name == 'table'
|
|
||||||
return false
|
|
||||||
elsif not tableVariant and SCOPING_ELEMENTS.include?(element.name)
|
|
||||||
return false
|
|
||||||
elsif element.name == 'html'
|
|
||||||
return false
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
assert false # We should never reach this point
|
|
||||||
end
|
|
||||||
|
|
||||||
def reconstructActiveFormattingElements
|
# Insert node as a child of the current node
|
||||||
# Within this algorithm the order of steps described in the
|
def appendChild(node)
|
||||||
# specification is not quite the same as the order of steps in the
|
raise NotImplementedError
|
||||||
# code. It should still do the same though.
|
end
|
||||||
|
|
||||||
# Step 1: stop the algorithm when there's nothing to do.
|
# Insert data as text in the current node, positioned before the
|
||||||
return unless @activeFormattingElements
|
# start of node insertBefore or to the end of the node's text.
|
||||||
|
def insertText(data, insertBefore=nil)
|
||||||
|
raise NotImplementedError
|
||||||
|
end
|
||||||
|
|
||||||
# Step 2 and step 3: we start with the last element. So i is -1.
|
# Insert node as a child of the current node, before refNode in the
|
||||||
i = -1
|
# list of child nodes. Raises ValueError if refNode is not a child of
|
||||||
entry = @activeFormattingElements[i]
|
# the current node
|
||||||
return if entry == Marker or @openElements.include?(entry)
|
def insertBefore(node, refNode)
|
||||||
|
raise NotImplementedError
|
||||||
|
end
|
||||||
|
|
||||||
# Step 6
|
# Remove node from the children of the current node
|
||||||
until entry == Marker or @openElements.include?(entry)
|
def removeChild(node)
|
||||||
|
raise NotImplementedError
|
||||||
|
end
|
||||||
|
|
||||||
|
# Move all the children of the current node to newParent.
|
||||||
|
# This is needed so that trees that don't store text as nodes move the
|
||||||
|
# text in the correct way
|
||||||
|
def reparentChildren(newParent)
|
||||||
|
#XXX - should this method be made more general?
|
||||||
|
@childNodes.each { |child| newParent.appendChild(child) }
|
||||||
|
@childNodes = []
|
||||||
|
end
|
||||||
|
|
||||||
|
# Return a shallow copy of the current node i.e. a node with the same
|
||||||
|
# name and attributes but with no parent or child nodes
|
||||||
|
def cloneNode
|
||||||
|
raise NotImplementedError
|
||||||
|
end
|
||||||
|
|
||||||
|
# Return true if the node has children or text, false otherwise
|
||||||
|
def hasContent
|
||||||
|
raise NotImplementedError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Base treebuilder implementation
|
||||||
|
class TreeBuilder
|
||||||
|
|
||||||
|
attr_accessor :openElements
|
||||||
|
|
||||||
|
attr_accessor :activeFormattingElements
|
||||||
|
|
||||||
|
attr_accessor :document
|
||||||
|
|
||||||
|
attr_accessor :headPointer
|
||||||
|
|
||||||
|
attr_accessor :formPointer
|
||||||
|
|
||||||
|
# Class to use for document root
|
||||||
|
documentClass = nil
|
||||||
|
|
||||||
|
# Class to use for HTML elements
|
||||||
|
elementClass = nil
|
||||||
|
|
||||||
|
# Class to use for comments
|
||||||
|
commentClass = nil
|
||||||
|
|
||||||
|
# Class to use for doctypes
|
||||||
|
doctypeClass = nil
|
||||||
|
|
||||||
|
# Fragment class
|
||||||
|
fragmentClass = nil
|
||||||
|
|
||||||
|
def initialize
|
||||||
|
reset
|
||||||
|
end
|
||||||
|
|
||||||
|
def reset
|
||||||
|
@openElements = []
|
||||||
|
@activeFormattingElements = []
|
||||||
|
|
||||||
|
#XXX - rename these to headElement, formElement
|
||||||
|
@headPointer = nil
|
||||||
|
@formPointer = nil
|
||||||
|
|
||||||
|
self.insertFromTable = false
|
||||||
|
|
||||||
|
@document = @documentClass.new
|
||||||
|
end
|
||||||
|
|
||||||
|
def elementInScope(target, tableVariant=false)
|
||||||
|
# Exit early when possible.
|
||||||
|
return true if @openElements[-1].name == target
|
||||||
|
|
||||||
|
# AT How about while true and simply set node to [-1] and set it to
|
||||||
|
# [-2] at the end...
|
||||||
|
@openElements.reverse.each do |element|
|
||||||
|
if element.name == target
|
||||||
|
return true
|
||||||
|
elsif element.name == 'table'
|
||||||
|
return false
|
||||||
|
elsif not tableVariant and SCOPING_ELEMENTS.include?(element.name)
|
||||||
|
return false
|
||||||
|
elsif element.name == 'html'
|
||||||
|
return false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
assert false # We should never reach this point
|
||||||
|
end
|
||||||
|
|
||||||
|
def reconstructActiveFormattingElements
|
||||||
|
# Within this algorithm the order of steps described in the
|
||||||
|
# specification is not quite the same as the order of steps in the
|
||||||
|
# code. It should still do the same though.
|
||||||
|
|
||||||
|
# Step 1: stop the algorithm when there's nothing to do.
|
||||||
|
return unless @activeFormattingElements
|
||||||
|
|
||||||
|
# Step 2 and step 3: we start with the last element. So i is -1.
|
||||||
|
i = -1
|
||||||
|
entry = @activeFormattingElements[i]
|
||||||
|
return if entry == Marker or @openElements.include?(entry)
|
||||||
|
|
||||||
|
# Step 6
|
||||||
|
until entry == Marker or @openElements.include?(entry)
|
||||||
# Step 5: let entry be one earlier in the list.
|
# Step 5: let entry be one earlier in the list.
|
||||||
i -= 1
|
i -= 1
|
||||||
begin
|
begin
|
||||||
entry = @activeFormattingElements[i]
|
entry = @activeFormattingElements[i]
|
||||||
rescue
|
rescue
|
||||||
# Step 4: at this point we need to jump to step 8. By not doing
|
# Step 4: at this point we need to jump to step 8. By not doing
|
||||||
# i += 1 which is also done in step 7 we achieve that.
|
# i += 1 which is also done in step 7 we achieve that.
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
while true
|
while true
|
||||||
# Step 7
|
# Step 7
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
|
@ -178,153 +178,153 @@ class TreeBuilder
|
||||||
|
|
||||||
# Step 11
|
# Step 11
|
||||||
break if element == @activeFormattingElements[-1]
|
break if element == @activeFormattingElements[-1]
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
def clearActiveFormattingElements
|
def clearActiveFormattingElements
|
||||||
{} until @activeFormattingElements.empty? || @activeFormattingElements.pop == Marker
|
{} until @activeFormattingElements.empty? || @activeFormattingElements.pop == Marker
|
||||||
end
|
end
|
||||||
|
|
||||||
# Check if an element exists between the end of the active
|
# Check if an element exists between the end of the active
|
||||||
# formatting elements and the last marker. If it does, return it, else
|
# formatting elements and the last marker. If it does, return it, else
|
||||||
# return false
|
# return false
|
||||||
def elementInActiveFormattingElements(name)
|
def elementInActiveFormattingElements(name)
|
||||||
@activeFormattingElements.reverse.each do |element|
|
@activeFormattingElements.reverse.each do |element|
|
||||||
# Check for Marker first because if it's a Marker it doesn't have a
|
# Check for Marker first because if it's a Marker it doesn't have a
|
||||||
# name attribute.
|
# name attribute.
|
||||||
break if element == Marker
|
break if element == Marker
|
||||||
return element if element.name == name
|
return element if element.name == name
|
||||||
|
end
|
||||||
|
return false
|
||||||
end
|
end
|
||||||
return false
|
|
||||||
end
|
|
||||||
|
|
||||||
def insertDoctype(name)
|
def insertDoctype(name)
|
||||||
@document.appendChild(@doctypeClass.new(name))
|
@document.appendChild(@doctypeClass.new(name))
|
||||||
end
|
end
|
||||||
|
|
||||||
def insertComment(data, parent = nil)
|
def insertComment(data, parent=nil)
|
||||||
parent = @openElements[-1] if parent.nil?
|
parent = @openElements[-1] if parent.nil?
|
||||||
parent.appendChild(@commentClass.new(data))
|
parent.appendChild(@commentClass.new(data))
|
||||||
end
|
end
|
||||||
|
|
||||||
# Create an element but don't insert it anywhere
|
# Create an element but don't insert it anywhere
|
||||||
def createElement(name, attributes)
|
def createElement(name, attributes)
|
||||||
element = @elementClass.new(name)
|
element = @elementClass.new(name)
|
||||||
element.attributes = attributes
|
element.attributes = attributes
|
||||||
return element
|
return element
|
||||||
end
|
end
|
||||||
|
|
||||||
# Switch the function used to insert an element from the
|
# Switch the function used to insert an element from the
|
||||||
# normal one to the misnested table one and back again
|
# normal one to the misnested table one and back again
|
||||||
def insertFromTable=(value)
|
def insertFromTable=(value)
|
||||||
@insertFromTable = value
|
@insertFromTable = value
|
||||||
@insertElement = value ? :insertElementTable : :insertElementNormal
|
@insertElement = value ? :insertElementTable : :insertElementNormal
|
||||||
end
|
end
|
||||||
|
|
||||||
def insertElement(name, attributes)
|
def insertElement(name, attributes)
|
||||||
send(@insertElement, name, attributes)
|
send(@insertElement, name, attributes)
|
||||||
end
|
end
|
||||||
|
|
||||||
def insertElementNormal(name, attributes)
|
def insertElementNormal(name, attributes)
|
||||||
element = @elementClass.new(name)
|
element = @elementClass.new(name)
|
||||||
element.attributes = attributes
|
element.attributes = attributes
|
||||||
@openElements[-1].appendChild(element)
|
@openElements[-1].appendChild(element)
|
||||||
@openElements.push(element)
|
@openElements.push(element)
|
||||||
return element
|
return element
|
||||||
end
|
end
|
||||||
|
|
||||||
# Create an element and insert it into the tree
|
# Create an element and insert it into the tree
|
||||||
def insertElementTable(name, attributes)
|
def insertElementTable(name, attributes)
|
||||||
element = @elementClass.new(name)
|
element = @elementClass.new(name)
|
||||||
element.attributes = attributes
|
element.attributes = attributes
|
||||||
if TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name)
|
if TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name)
|
||||||
#We should be in the InTable mode. This means we want to do
|
#We should be in the InTable mode. This means we want to do
|
||||||
#special magic element rearranging
|
#special magic element rearranging
|
||||||
parent, insertBefore = getTableMisnestedNodePosition
|
parent, insertBefore = getTableMisnestedNodePosition
|
||||||
if insertBefore.nil?
|
if insertBefore.nil?
|
||||||
parent.appendChild(element)
|
parent.appendChild(element)
|
||||||
else
|
else
|
||||||
parent.insertBefore(element, insertBefore)
|
parent.insertBefore(element, insertBefore)
|
||||||
end
|
end
|
||||||
@openElements.push(element)
|
@openElements.push(element)
|
||||||
else
|
else
|
||||||
return insertElementNormal(name, attributes)
|
return insertElementNormal(name, attributes)
|
||||||
|
end
|
||||||
|
return element
|
||||||
end
|
end
|
||||||
return element
|
|
||||||
end
|
|
||||||
|
|
||||||
def insertText(data, parent = nil)
|
def insertText(data, parent=nil)
|
||||||
parent = @openElements[-1] if parent.nil?
|
parent = @openElements[-1] if parent.nil?
|
||||||
|
|
||||||
if (not(@insertFromTable) or (@insertFromTable and not TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name)))
|
if (not(@insertFromTable) or (@insertFromTable and not TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name)))
|
||||||
parent.insertText(data)
|
parent.insertText(data)
|
||||||
else
|
else
|
||||||
#We should be in the InTable mode. This means we want to do
|
#We should be in the InTable mode. This means we want to do
|
||||||
#special magic element rearranging
|
#special magic element rearranging
|
||||||
parent, insertBefore = getTableMisnestedNodePosition
|
parent, insertBefore = getTableMisnestedNodePosition
|
||||||
parent.insertText(data, insertBefore)
|
parent.insertText(data, insertBefore)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
# Get the foster parent element, and sibling to insert before
|
||||||
# Get the foster parent element, and sibling to insert before
|
# (or nil) when inserting a misnested table node
|
||||||
# (or nil) when inserting a misnested table node
|
def getTableMisnestedNodePosition
|
||||||
def getTableMisnestedNodePosition
|
#The foster parent element is the one which comes before the most
|
||||||
#The foster parent element is the one which comes before the most
|
#recently opened table element
|
||||||
#recently opened table element
|
#XXX - this is really inelegant
|
||||||
#XXX - this is really inelegant
|
lastTable = nil
|
||||||
lastTable = nil
|
fosterParent = nil
|
||||||
fosterParent = nil
|
insertBefore = nil
|
||||||
insertBefore = nil
|
@openElements.reverse.each do |element|
|
||||||
@openElements.reverse.each do |element|
|
|
||||||
if element.name == "table"
|
if element.name == "table"
|
||||||
lastTable = element
|
lastTable = element
|
||||||
break
|
break
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
if lastTable
|
if lastTable
|
||||||
#XXX - we should really check that this parent is actually a
|
#XXX - we should really check that this parent is actually a
|
||||||
#node here
|
#node here
|
||||||
if lastTable.parent
|
if lastTable.parent
|
||||||
fosterParent = lastTable.parent
|
fosterParent = lastTable.parent
|
||||||
insertBefore = lastTable
|
insertBefore = lastTable
|
||||||
else
|
else
|
||||||
fosterParent = @openElements[@openElements.index(lastTable) - 1]
|
fosterParent = @openElements[@openElements.index(lastTable) - 1]
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
fosterParent = @openElements[0]
|
fosterParent = @openElements[0]
|
||||||
|
end
|
||||||
|
return fosterParent, insertBefore
|
||||||
end
|
end
|
||||||
return fosterParent, insertBefore
|
|
||||||
end
|
|
||||||
|
|
||||||
def generateImpliedEndTags(exclude = nil)
|
def generateImpliedEndTags(exclude=nil)
|
||||||
name = @openElements[-1].name
|
name = @openElements[-1].name
|
||||||
|
|
||||||
if (['dd', 'dt', 'li', 'p', 'td', 'th', 'tr'].include?(name) and name != exclude)
|
if (['dd', 'dt', 'li', 'p', 'td', 'th', 'tr'].include?(name) and name != exclude)
|
||||||
@openElements.pop
|
@openElements.pop
|
||||||
# XXX This is not entirely what the specification says. We should
|
# XXX This is not entirely what the specification says. We should
|
||||||
# investigate it more closely.
|
# investigate it more closely.
|
||||||
generateImpliedEndTags(exclude)
|
generateImpliedEndTags(exclude)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
def getDocument
|
def getDocument
|
||||||
@document
|
@document
|
||||||
end
|
end
|
||||||
|
|
||||||
def getFragment
|
def getFragment
|
||||||
#assert @innerHTML
|
#assert @innerHTML
|
||||||
fragment = @fragmentClass.new
|
fragment = @fragmentClass.new
|
||||||
@openElements[0].reparentChildren(fragment)
|
@openElements[0].reparentChildren(fragment)
|
||||||
return fragment
|
return fragment
|
||||||
end
|
end
|
||||||
|
|
||||||
# Serialize the subtree of node in the format required by unit tests
|
# Serialize the subtree of node in the format required by unit tests
|
||||||
# node - the node from which to start serializing
|
# node - the node from which to start serializing
|
||||||
def testSerializer(node)
|
def testSerializer(node)
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -3,209 +3,212 @@ require 'hpricot'
|
||||||
require 'forwardable'
|
require 'forwardable'
|
||||||
|
|
||||||
module HTML5lib
|
module HTML5lib
|
||||||
module TreeBuilders
|
module TreeBuilders
|
||||||
module Hpricot
|
module Hpricot
|
||||||
|
|
||||||
class Node < Base::Node
|
class Node < Base::Node
|
||||||
|
|
||||||
extend Forwardable
|
extend Forwardable
|
||||||
|
|
||||||
def_delegators :@hpricot, :name
|
def_delegators :@hpricot, :name
|
||||||
|
|
||||||
attr_accessor :hpricot
|
attr_accessor :hpricot
|
||||||
|
|
||||||
def initialize(name)
|
def initialize(name)
|
||||||
super(name)
|
super(name)
|
||||||
@hpricot = self.class.hpricot_class.new name
|
@hpricot = self.class.hpricot_class.new name
|
||||||
end
|
end
|
||||||
|
|
||||||
def appendChild(node)
|
def appendChild(node)
|
||||||
if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
|
if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
|
||||||
childNodes[-1].hpricot.content = childNodes[-1].hpricot.to_s + node.hpricot.to_s
|
childNodes[-1].hpricot.content = childNodes[-1].hpricot.to_s + node.hpricot.to_s
|
||||||
else
|
else
|
||||||
childNodes << node
|
childNodes << node
|
||||||
hpricot.children << node.hpricot
|
hpricot.children << node.hpricot
|
||||||
|
end
|
||||||
|
node.parent = self
|
||||||
end
|
end
|
||||||
node.parent = self
|
|
||||||
end
|
|
||||||
|
|
||||||
def removeChild(node)
|
def removeChild(node)
|
||||||
childNodes.delete(node)
|
childNodes.delete(node)
|
||||||
hpricot.children.delete_at(hpricot.children.index(node.hpricot))
|
hpricot.children.delete_at(hpricot.children.index(node.hpricot))
|
||||||
node.parent = nil
|
node.parent = nil
|
||||||
end
|
end
|
||||||
|
|
||||||
def insertText(data, before = nil)
|
def insertText(data, before=nil)
|
||||||
if before
|
if before
|
||||||
insertBefore(TextNode.new(data), before)
|
insertBefore(TextNode.new(data), before)
|
||||||
else
|
else
|
||||||
appendChild(TextNode.new(data))
|
appendChild(TextNode.new(data))
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
def insertBefore(node, refNode)
|
def insertBefore(node, refNode)
|
||||||
index = childNodes.index(refNode)
|
index = childNodes.index(refNode)
|
||||||
if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
|
if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
|
||||||
childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
|
childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
|
||||||
else
|
else
|
||||||
childNodes.insert(index, node)
|
childNodes.insert(index, node)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
def hasContent
|
def hasContent
|
||||||
childNodes.any?
|
childNodes.any?
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class Element < Node
|
class Element < Node
|
||||||
def self.hpricot_class
|
def self.hpricot_class
|
||||||
::Hpricot::Elem
|
::Hpricot::Elem
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize(name)
|
def initialize(name)
|
||||||
super(name)
|
super(name)
|
||||||
|
|
||||||
@hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
|
@hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
|
||||||
end
|
end
|
||||||
|
|
||||||
def name
|
def name
|
||||||
@hpricot.stag.name
|
@hpricot.stag.name
|
||||||
end
|
end
|
||||||
|
|
||||||
def cloneNode
|
def cloneNode
|
||||||
attributes.inject(self.class.new(name)) do |node, (name, value)|
|
attributes.inject(self.class.new(name)) do |node, (name, value)|
|
||||||
node.hpricot[name] = value
|
node.hpricot[name] = value
|
||||||
node
|
node
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
# A call to Hpricot::Elem#raw_attributes is built dynamically,
|
# A call to Hpricot::Elem#raw_attributes is built dynamically,
|
||||||
# so alterations to the returned value (a hash) will be lost.
|
# so alterations to the returned value (a hash) will be lost.
|
||||||
#
|
#
|
||||||
# AttributeProxy works around this by forwarding :[]= calls
|
# AttributeProxy works around this by forwarding :[]= calls
|
||||||
# to the raw_attributes accessor on the element start tag.
|
# to the raw_attributes accessor on the element start tag.
|
||||||
#
|
#
|
||||||
class AttributeProxy
|
class AttributeProxy
|
||||||
def initialize(hpricot)
|
def initialize(hpricot)
|
||||||
@hpricot = hpricot
|
@hpricot = hpricot
|
||||||
end
|
end
|
||||||
def []=(k, v)
|
|
||||||
|
def []=(k, v)
|
||||||
@hpricot.stag.send(stag_attributes_method)[k] = v
|
@hpricot.stag.send(stag_attributes_method)[k] = v
|
||||||
end
|
end
|
||||||
def stag_attributes_method
|
|
||||||
|
def stag_attributes_method
|
||||||
# STag#attributes changed to STag#raw_attributes after Hpricot 0.5
|
# STag#attributes changed to STag#raw_attributes after Hpricot 0.5
|
||||||
@hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
|
@hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
|
||||||
end
|
end
|
||||||
def method_missing(*a, &b)
|
|
||||||
|
def method_missing(*a, &b)
|
||||||
@hpricot.attributes.send(*a, &b)
|
@hpricot.attributes.send(*a, &b)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
def attributes
|
def attributes
|
||||||
AttributeProxy.new(@hpricot)
|
AttributeProxy.new(@hpricot)
|
||||||
end
|
end
|
||||||
|
|
||||||
def attributes=(attrs)
|
def attributes=(attrs)
|
||||||
attrs.each { |name, value| @hpricot[name] = value }
|
attrs.each { |name, value| @hpricot[name] = value }
|
||||||
end
|
end
|
||||||
|
|
||||||
def printTree(indent = 0)
|
def printTree(indent=0)
|
||||||
tree = "\n|#{' ' * indent}<#{name}>"
|
tree = "\n|#{' ' * indent}<#{name}>"
|
||||||
indent += 2
|
indent += 2
|
||||||
attributes.each do |name, value|
|
attributes.each do |name, value|
|
||||||
next if name == 'xmlns'
|
next if name == 'xmlns'
|
||||||
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
|
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
|
||||||
|
end
|
||||||
|
childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
|
||||||
end
|
end
|
||||||
childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
|
end
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class Document < Node
|
class Document < Node
|
||||||
def self.hpricot_class
|
def self.hpricot_class
|
||||||
::Hpricot::Doc
|
::Hpricot::Doc
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize
|
def initialize
|
||||||
super(nil)
|
super(nil)
|
||||||
end
|
end
|
||||||
|
|
||||||
def printTree(indent = 0)
|
def printTree(indent=0)
|
||||||
childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
|
childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class DocumentType < Node
|
class DocumentType < Node
|
||||||
def self.hpricot_class
|
def self.hpricot_class
|
||||||
::Hpricot::DocType
|
::Hpricot::DocType
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize(name)
|
def initialize(name)
|
||||||
begin
|
begin
|
||||||
super(name)
|
super(name)
|
||||||
rescue ArgumentError # needs 3...
|
rescue ArgumentError # needs 3...
|
||||||
|
end
|
||||||
|
|
||||||
|
@hpricot = ::Hpricot::DocType.new(name, nil, nil)
|
||||||
end
|
end
|
||||||
|
|
||||||
@hpricot = ::Hpricot::DocType.new(name, nil, nil)
|
def printTree(indent=0)
|
||||||
end
|
"\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class DocumentFragment < Element
|
||||||
|
def initialize
|
||||||
|
super('')
|
||||||
|
end
|
||||||
|
|
||||||
|
def printTree(indent=0)
|
||||||
|
childNodes.inject('') { |tree, child| tree + child.printTree(indent+2) }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class TextNode < Node
|
||||||
|
def initialize(data)
|
||||||
|
@hpricot = ::Hpricot::Text.new(data)
|
||||||
|
end
|
||||||
|
|
||||||
|
def printTree(indent=0)
|
||||||
|
"\n|#{' ' * indent}\"#{hpricot.content}\""
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class CommentNode < Node
|
||||||
|
def self.hpricot_class
|
||||||
|
::Hpricot::Comment
|
||||||
|
end
|
||||||
|
|
||||||
|
def printTree(indent=0)
|
||||||
|
"\n|#{' ' * indent}<!-- #{hpricot.content} -->"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class TreeBuilder < Base::TreeBuilder
|
||||||
|
def initialize
|
||||||
|
@documentClass = Document
|
||||||
|
@doctypeClass = DocumentType
|
||||||
|
@elementClass = Element
|
||||||
|
@commentClass = CommentNode
|
||||||
|
@fragmentClass = DocumentFragment
|
||||||
|
end
|
||||||
|
|
||||||
|
def testSerializer(node)
|
||||||
|
node.printTree
|
||||||
|
end
|
||||||
|
|
||||||
|
def getDocument
|
||||||
|
@document.hpricot
|
||||||
|
end
|
||||||
|
|
||||||
|
def getFragment
|
||||||
|
@document = super
|
||||||
|
return @document.hpricot.children
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def printTree(indent = 0)
|
|
||||||
"\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class DocumentFragment < Element
|
|
||||||
def initialize
|
|
||||||
super('')
|
|
||||||
end
|
|
||||||
|
|
||||||
def printTree(indent = 0)
|
|
||||||
childNodes.inject('') { |tree, child| tree + child.printTree(indent+2) }
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class TextNode < Node
|
|
||||||
def initialize(data)
|
|
||||||
@hpricot = ::Hpricot::Text.new(data)
|
|
||||||
end
|
|
||||||
|
|
||||||
def printTree(indent = 0)
|
|
||||||
"\n|#{' ' * indent}\"#{hpricot.content}\""
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class CommentNode < Node
|
|
||||||
def self.hpricot_class
|
|
||||||
::Hpricot::Comment
|
|
||||||
end
|
|
||||||
|
|
||||||
def printTree(indent = 0)
|
|
||||||
"\n|#{' ' * indent}<!-- #{hpricot.content} -->"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class TreeBuilder < Base::TreeBuilder
|
|
||||||
def initialize
|
|
||||||
@documentClass = Document
|
|
||||||
@doctypeClass = DocumentType
|
|
||||||
@elementClass = Element
|
|
||||||
@commentClass = CommentNode
|
|
||||||
@fragmentClass = DocumentFragment
|
|
||||||
end
|
|
||||||
|
|
||||||
def testSerializer(node)
|
|
||||||
node.printTree
|
|
||||||
end
|
|
||||||
|
|
||||||
def getDocument
|
|
||||||
@document.hpricot
|
|
||||||
end
|
|
||||||
|
|
||||||
def getFragment
|
|
||||||
@document = super
|
|
||||||
return @document.hpricot.children
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -3,189 +3,189 @@ require 'rexml/document'
|
||||||
require 'forwardable'
|
require 'forwardable'
|
||||||
|
|
||||||
module HTML5lib
|
module HTML5lib
|
||||||
module TreeBuilders
|
module TreeBuilders
|
||||||
module REXMLTree
|
module REXMLTree
|
||||||
|
|
||||||
class Node < Base::Node
|
class Node < Base::Node
|
||||||
extend Forwardable
|
extend Forwardable
|
||||||
def_delegators :@rxobj, :name, :attributes
|
def_delegators :@rxobj, :name, :attributes
|
||||||
attr_accessor :rxobj
|
attr_accessor :rxobj
|
||||||
|
|
||||||
def initialize name
|
def initialize name
|
||||||
super name
|
super name
|
||||||
@rxobj = self.class.rxclass.new name
|
@rxobj = self.class.rxclass.new name
|
||||||
end
|
end
|
||||||
|
|
||||||
def appendChild node
|
def appendChild node
|
||||||
if node.kind_of? TextNode and
|
if node.kind_of? TextNode and
|
||||||
childNodes.length>0 and childNodes[-1].kind_of? TextNode
|
childNodes.length>0 and childNodes[-1].kind_of? TextNode
|
||||||
childNodes[-1].rxobj.value =
|
childNodes[-1].rxobj.value =
|
||||||
childNodes[-1].rxobj.to_s + node.rxobj.to_s
|
childNodes[-1].rxobj.to_s + node.rxobj.to_s
|
||||||
childNodes[-1].rxobj.raw = true
|
childNodes[-1].rxobj.raw = true
|
||||||
else
|
else
|
||||||
childNodes.push node
|
childNodes.push node
|
||||||
rxobj.add node.rxobj
|
rxobj.add node.rxobj
|
||||||
|
end
|
||||||
|
node.parent = self
|
||||||
end
|
end
|
||||||
node.parent = self
|
|
||||||
end
|
|
||||||
|
|
||||||
def removeChild node
|
def removeChild node
|
||||||
childNodes.delete node
|
childNodes.delete node
|
||||||
rxobj.delete node.rxobj
|
rxobj.delete node.rxobj
|
||||||
node.parent = nil
|
node.parent = nil
|
||||||
end
|
end
|
||||||
|
|
||||||
def insertText data, before=nil
|
def insertText data, before=nil
|
||||||
if before
|
if before
|
||||||
insertBefore TextNode.new(data), before
|
insertBefore TextNode.new(data), before
|
||||||
else
|
else
|
||||||
appendChild TextNode.new(data)
|
appendChild TextNode.new(data)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
def insertBefore node, refNode
|
def insertBefore node, refNode
|
||||||
index = childNodes.index(refNode)
|
index = childNodes.index(refNode)
|
||||||
if node.kind_of? TextNode and index>0 and
|
if node.kind_of? TextNode and index>0 and
|
||||||
childNodes[index-1].kind_of? TextNode
|
childNodes[index-1].kind_of? TextNode
|
||||||
childNodes[index-1].rxobj.value =
|
childNodes[index-1].rxobj.value =
|
||||||
childNodes[index-1].rxobj.to_s + node.rxobj.to_s
|
childNodes[index-1].rxobj.to_s + node.rxobj.to_s
|
||||||
childNodes[index-1].rxobj.raw = true
|
childNodes[index-1].rxobj.raw = true
|
||||||
else
|
else
|
||||||
childNodes.insert index, node
|
childNodes.insert index, node
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
def hasContent
|
def hasContent
|
||||||
return (childNodes.length > 0)
|
return (childNodes.length > 0)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class Element < Node
|
class Element < Node
|
||||||
def self.rxclass
|
def self.rxclass
|
||||||
REXML::Element
|
REXML::Element
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize name
|
def initialize name
|
||||||
super name
|
super name
|
||||||
end
|
end
|
||||||
|
|
||||||
def cloneNode
|
def cloneNode
|
||||||
newNode = self.class.new name
|
newNode = self.class.new name
|
||||||
attributes.each {|name,value| newNode.attributes[name] = value}
|
attributes.each {|name,value| newNode.attributes[name] = value}
|
||||||
newNode
|
newNode
|
||||||
end
|
end
|
||||||
|
|
||||||
def attributes= value
|
def attributes= value
|
||||||
value.each {|name,value| rxobj.attributes[name]=value}
|
value.each {|name, value| rxobj.attributes[name]=value}
|
||||||
end
|
end
|
||||||
|
|
||||||
def printTree indent=0
|
def printTree indent=0
|
||||||
tree = "\n|#{' ' * indent}<#{name}>"
|
tree = "\n|#{' ' * indent}<#{name}>"
|
||||||
indent += 2
|
indent += 2
|
||||||
for name, value in attributes
|
for name, value in attributes
|
||||||
next if name == 'xmlns'
|
next if name == 'xmlns'
|
||||||
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
|
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
|
||||||
end
|
end
|
||||||
for child in childNodes
|
for child in childNodes
|
||||||
tree += child.printTree(indent)
|
tree += child.printTree(indent)
|
||||||
|
end
|
||||||
|
return tree
|
||||||
end
|
end
|
||||||
return tree
|
end
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class Document < Node
|
class Document < Node
|
||||||
def self.rxclass
|
def self.rxclass
|
||||||
REXML::Document
|
REXML::Document
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize
|
def initialize
|
||||||
super nil
|
super nil
|
||||||
end
|
end
|
||||||
|
|
||||||
def appendChild node
|
def appendChild node
|
||||||
if node.kind_of? Element and node.name == 'html'
|
if node.kind_of? Element and node.name == 'html'
|
||||||
node.rxobj.add_namespace('http://www.w3.org/1999/xhtml')
|
node.rxobj.add_namespace('http://www.w3.org/1999/xhtml')
|
||||||
end
|
end
|
||||||
super node
|
super node
|
||||||
end
|
end
|
||||||
|
|
||||||
def printTree indent=0
|
def printTree indent=0
|
||||||
tree = "#document"
|
tree = "#document"
|
||||||
for child in childNodes
|
for child in childNodes
|
||||||
tree += child.printTree(indent + 2)
|
tree += child.printTree(indent + 2)
|
||||||
|
end
|
||||||
|
return tree
|
||||||
end
|
end
|
||||||
return tree
|
end
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class DocumentType < Node
|
class DocumentType < Node
|
||||||
def self.rxclass
|
def self.rxclass
|
||||||
REXML::DocType
|
REXML::DocType
|
||||||
end
|
end
|
||||||
|
|
||||||
def printTree indent=0
|
def printTree indent=0
|
||||||
"\n|#{' ' * indent}<!DOCTYPE #{name}>"
|
"\n|#{' ' * indent}<!DOCTYPE #{name}>"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class DocumentFragment < Element
|
class DocumentFragment < Element
|
||||||
def initialize
|
def initialize
|
||||||
super nil
|
super nil
|
||||||
end
|
end
|
||||||
|
|
||||||
def printTree indent=0
|
def printTree indent=0
|
||||||
tree = ""
|
tree = ""
|
||||||
for child in childNodes
|
for child in childNodes
|
||||||
tree += child.printTree(indent+2)
|
tree += child.printTree(indent+2)
|
||||||
|
end
|
||||||
|
return tree
|
||||||
end
|
end
|
||||||
return tree
|
end
|
||||||
|
|
||||||
|
class TextNode < Node
|
||||||
|
def initialize data
|
||||||
|
raw=data.gsub('&','&').gsub('<','<').gsub('>','>')
|
||||||
|
@rxobj = REXML::Text.new(raw, true, nil, true)
|
||||||
|
end
|
||||||
|
|
||||||
|
def printTree indent=0
|
||||||
|
"\n|#{' ' * indent}\"#{rxobj.value}\""
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class CommentNode < Node
|
||||||
|
def self.rxclass
|
||||||
|
REXML::Comment
|
||||||
|
end
|
||||||
|
|
||||||
|
def printTree indent=0
|
||||||
|
"\n|#{' ' * indent}<!-- #{rxobj.string} -->"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class TreeBuilder < Base::TreeBuilder
|
||||||
|
def initialize
|
||||||
|
@documentClass = Document
|
||||||
|
@doctypeClass = DocumentType
|
||||||
|
@elementClass = Element
|
||||||
|
@commentClass = CommentNode
|
||||||
|
@fragmentClass = DocumentFragment
|
||||||
|
end
|
||||||
|
|
||||||
|
def testSerializer node
|
||||||
|
node.printTree()
|
||||||
|
end
|
||||||
|
|
||||||
|
def getDocument
|
||||||
|
@document.rxobj
|
||||||
|
end
|
||||||
|
|
||||||
|
def getFragment
|
||||||
|
@document = super
|
||||||
|
return @document.rxobj.children
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class TextNode < Node
|
|
||||||
def initialize data
|
|
||||||
raw=data.gsub('&','&').gsub('<','<').gsub('>','>')
|
|
||||||
@rxobj = REXML::Text.new(raw, true, nil, true)
|
|
||||||
end
|
|
||||||
|
|
||||||
def printTree indent=0
|
|
||||||
"\n|#{' ' * indent}\"#{rxobj.value}\""
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class CommentNode < Node
|
|
||||||
def self.rxclass
|
|
||||||
REXML::Comment
|
|
||||||
end
|
|
||||||
|
|
||||||
def printTree indent=0
|
|
||||||
"\n|#{' ' * indent}<!-- #{rxobj.string} -->"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class TreeBuilder < Base::TreeBuilder
|
|
||||||
def initialize
|
|
||||||
@documentClass = Document
|
|
||||||
@doctypeClass = DocumentType
|
|
||||||
@elementClass = Element
|
|
||||||
@commentClass = CommentNode
|
|
||||||
@fragmentClass = DocumentFragment
|
|
||||||
end
|
|
||||||
|
|
||||||
def testSerializer node
|
|
||||||
node.printTree()
|
|
||||||
end
|
|
||||||
|
|
||||||
def getDocument
|
|
||||||
@document.rxobj
|
|
||||||
end
|
|
||||||
|
|
||||||
def getFragment
|
|
||||||
@document = super
|
|
||||||
return @document.rxobj.children
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,178 +1,178 @@
|
||||||
require 'html5lib/treebuilders/base'
|
require 'html5lib/treebuilders/base'
|
||||||
|
|
||||||
module HTML5lib
|
module HTML5lib
|
||||||
module TreeBuilders
|
module TreeBuilders
|
||||||
module SimpleTree
|
module SimpleTree
|
||||||
|
|
||||||
class Node < Base::Node
|
class Node < Base::Node
|
||||||
# Node representing an item in the tree.
|
# Node representing an item in the tree.
|
||||||
# name - The tag name associated with the node
|
# name - The tag name associated with the node
|
||||||
attr_accessor :name
|
attr_accessor :name
|
||||||
|
|
||||||
# The value of the current node (applies to text nodes and
|
# The value of the current node (applies to text nodes and
|
||||||
# comments
|
# comments
|
||||||
attr_accessor :value
|
attr_accessor :value
|
||||||
|
|
||||||
# a dict holding name, value pairs for attributes of the node
|
# a dict holding name, value pairs for attributes of the node
|
||||||
attr_accessor :attributes
|
attr_accessor :attributes
|
||||||
|
|
||||||
def initialize name
|
def initialize name
|
||||||
super
|
super
|
||||||
@name = name
|
@name = name
|
||||||
@value = nil
|
@value = nil
|
||||||
@attributes = {}
|
@attributes = {}
|
||||||
end
|
end
|
||||||
|
|
||||||
def appendChild node
|
def appendChild node
|
||||||
if node.kind_of? TextNode and
|
if node.kind_of? TextNode and
|
||||||
childNodes.length>0 and childNodes[-1].kind_of? TextNode
|
childNodes.length>0 and childNodes[-1].kind_of? TextNode
|
||||||
childNodes[-1].value += node.value
|
childNodes[-1].value += node.value
|
||||||
else
|
else
|
||||||
childNodes.push node
|
childNodes.push node
|
||||||
|
end
|
||||||
|
node.parent = self
|
||||||
end
|
end
|
||||||
node.parent = self
|
|
||||||
end
|
|
||||||
|
|
||||||
def removeChild node
|
def removeChild node
|
||||||
childNodes.delete node
|
childNodes.delete node
|
||||||
node.parent = nil
|
node.parent = nil
|
||||||
end
|
end
|
||||||
|
|
||||||
def cloneNode
|
def cloneNode
|
||||||
newNode = self.class.new name
|
newNode = self.class.new name
|
||||||
attributes.each {|name,value| newNode.attributes[name] = value}
|
attributes.each {|name,value| newNode.attributes[name] = value}
|
||||||
newNode.value = value
|
newNode.value = value
|
||||||
newNode
|
newNode
|
||||||
end
|
end
|
||||||
|
|
||||||
def insertText data, before=nil
|
def insertText data, before=nil
|
||||||
if before
|
if before
|
||||||
insertBefore TextNode.new(data), before
|
insertBefore TextNode.new(data), before
|
||||||
else
|
else
|
||||||
appendChild TextNode.new(data)
|
appendChild TextNode.new(data)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
def insertBefore node, refNode
|
def insertBefore node, refNode
|
||||||
index = childNodes.index(refNode)
|
index = childNodes.index(refNode)
|
||||||
if node.kind_of? TextNode and index>0 and
|
if node.kind_of? TextNode and index>0 and
|
||||||
childNodes[index-1].kind_of? TextNode
|
childNodes[index-1].kind_of? TextNode
|
||||||
childNodes[index-1].value += node.value
|
childNodes[index-1].value += node.value
|
||||||
else
|
else
|
||||||
childNodes.insert index, node
|
childNodes.insert index, node
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
def printTree indent=0
|
def printTree indent=0
|
||||||
tree = "\n|%s%s" % [' '* indent, self.to_s]
|
tree = "\n|%s%s" % [' '* indent, self.to_s]
|
||||||
for child in childNodes
|
for child in childNodes
|
||||||
tree += child.printTree(indent + 2)
|
tree += child.printTree(indent + 2)
|
||||||
|
end
|
||||||
|
return tree
|
||||||
end
|
end
|
||||||
return tree
|
|
||||||
end
|
|
||||||
|
|
||||||
def hasContent
|
def hasContent
|
||||||
return (childNodes.length > 0)
|
return (childNodes.length > 0)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class Element < Node
|
class Element < Node
|
||||||
def to_s
|
def to_s
|
||||||
"<%s>" % name
|
"<%s>" % name
|
||||||
end
|
end
|
||||||
|
|
||||||
def printTree indent=0
|
def printTree indent=0
|
||||||
tree = "\n|%s%s" % [' '* indent, self.to_s]
|
tree = "\n|%s%s" % [' '* indent, self.to_s]
|
||||||
indent += 2
|
indent += 2
|
||||||
for name, value in attributes
|
for name, value in attributes
|
||||||
tree += "\n|%s%s=\"%s\"" % [' ' * indent, name, value]
|
tree += "\n|%s%s=\"%s\"" % [' ' * indent, name, value]
|
||||||
end
|
end
|
||||||
for child in childNodes
|
for child in childNodes
|
||||||
tree += child.printTree(indent)
|
tree += child.printTree(indent)
|
||||||
|
end
|
||||||
|
return tree
|
||||||
end
|
end
|
||||||
return tree
|
end
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class Document < Node
|
class Document < Node
|
||||||
def to_s
|
def to_s
|
||||||
"#document"
|
"#document"
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize
|
def initialize
|
||||||
super nil
|
super nil
|
||||||
end
|
end
|
||||||
|
|
||||||
def printTree indent=0
|
def printTree indent=0
|
||||||
tree = to_s
|
tree = to_s
|
||||||
for child in childNodes
|
for child in childNodes
|
||||||
tree += child.printTree(indent + 2)
|
tree += child.printTree(indent + 2)
|
||||||
|
end
|
||||||
|
return tree
|
||||||
end
|
end
|
||||||
return tree
|
end
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class DocumentType < Node
|
class DocumentType < Node
|
||||||
def to_s
|
def to_s
|
||||||
"<!DOCTYPE %s>" % name
|
"<!DOCTYPE %s>" % name
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class DocumentFragment < Element
|
class DocumentFragment < Element
|
||||||
def initialize
|
def initialize
|
||||||
super nil
|
super nil
|
||||||
end
|
end
|
||||||
|
|
||||||
def printTree indent=0
|
def printTree indent=0
|
||||||
tree = ""
|
tree = ""
|
||||||
for child in childNodes
|
for child in childNodes
|
||||||
tree += child.printTree(indent+2)
|
tree += child.printTree(indent+2)
|
||||||
|
end
|
||||||
|
return tree
|
||||||
end
|
end
|
||||||
return tree
|
end
|
||||||
|
|
||||||
|
class TextNode < Node
|
||||||
|
def initialize value
|
||||||
|
super nil
|
||||||
|
@value = value
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_s
|
||||||
|
'"%s"' % value
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class CommentNode < Node
|
||||||
|
def initialize value
|
||||||
|
super nil
|
||||||
|
@value = value
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_s
|
||||||
|
"<!-- %s -->" % value
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class TreeBuilder < Base::TreeBuilder
|
||||||
|
def initialize
|
||||||
|
@documentClass = Document
|
||||||
|
@doctypeClass = DocumentType
|
||||||
|
@elementClass = Element
|
||||||
|
@commentClass = CommentNode
|
||||||
|
@fragmentClass = DocumentFragment
|
||||||
|
end
|
||||||
|
|
||||||
|
def testSerializer node
|
||||||
|
node.printTree()
|
||||||
|
end
|
||||||
|
|
||||||
|
def getFragment
|
||||||
|
@document = super
|
||||||
|
return @document.childNodes
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
class TextNode < Node
|
|
||||||
def initialize value
|
|
||||||
super nil
|
|
||||||
@value = value
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_s
|
|
||||||
'"%s"' % value
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class CommentNode < Node
|
|
||||||
def initialize value
|
|
||||||
super nil
|
|
||||||
@value = value
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_s
|
|
||||||
"<!-- %s -->" % value
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class TreeBuilder < Base::TreeBuilder
|
|
||||||
def initialize
|
|
||||||
@documentClass = Document
|
|
||||||
@doctypeClass = DocumentType
|
|
||||||
@elementClass = Element
|
|
||||||
@commentClass = CommentNode
|
|
||||||
@fragmentClass = DocumentFragment
|
|
||||||
end
|
|
||||||
|
|
||||||
def testSerializer node
|
|
||||||
node.printTree()
|
|
||||||
end
|
|
||||||
|
|
||||||
def getFragment
|
|
||||||
@document = super
|
|
||||||
return @document.childNodes
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
14
vendor/plugins/HTML5lib/tests/preamble.rb
vendored
14
vendor/plugins/HTML5lib/tests/preamble.rb
vendored
|
@ -7,5 +7,17 @@ $:.unshift File.join(File.dirname(File.dirname(__FILE__)),'lib')
|
||||||
$:.unshift File.dirname(__FILE__)
|
$:.unshift File.dirname(__FILE__)
|
||||||
|
|
||||||
def html5lib_test_files(subdirectory)
|
def html5lib_test_files(subdirectory)
|
||||||
Dir[File.join(HTML5LIB_BASE, 'tests', subdirectory, '*.*')]
|
Dir[File.join(HTML5LIB_BASE, 'tests', subdirectory, '*.*')]
|
||||||
|
end
|
||||||
|
|
||||||
|
begin
|
||||||
|
require 'jsonx'
|
||||||
|
rescue LoadError
|
||||||
|
class JSON
|
||||||
|
def self.parse json
|
||||||
|
json.gsub! /"\s*:/, '"=>'
|
||||||
|
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
|
||||||
|
eval json
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -11,7 +11,7 @@ begin
|
||||||
def test_chardet
|
def test_chardet
|
||||||
File.open(File.join(HTML5LIB_BASE, 'tests', 'encoding', 'chardet', 'test_big5.txt')) do |file|
|
File.open(File.join(HTML5LIB_BASE, 'tests', 'encoding', 'chardet', 'test_big5.txt')) do |file|
|
||||||
stream = HTML5lib::HTMLInputStream.new(file, :chardet => true)
|
stream = HTML5lib::HTMLInputStream.new(file, :chardet => true)
|
||||||
assert_equal 'big5', stream.charEncoding.downcase
|
assert_equal 'big5', stream.char_encoding.downcase
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
rescue LoadError
|
rescue LoadError
|
||||||
|
@ -28,7 +28,7 @@ end
|
||||||
|
|
||||||
define_method 'test_%s_%d' % [ test_name, index + 1 ] do
|
define_method 'test_%s_%d' % [ test_name, index + 1 ] do
|
||||||
stream = HTML5lib::HTMLInputStream.new(input, :chardet => false)
|
stream = HTML5lib::HTMLInputStream.new(input, :chardet => false)
|
||||||
assert_equal encoding.downcase, stream.charEncoding.downcase, input
|
assert_equal encoding.downcase, stream.char_encoding.downcase, input
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
144
vendor/plugins/HTML5lib/tests/test_lxp.rb
vendored
144
vendor/plugins/HTML5lib/tests/test_lxp.rb
vendored
|
@ -6,19 +6,19 @@ XMLELEM = /<(\w+\s*)((?:[-:\w]+="[^"]*"\s*)+)(\/?)>/
|
||||||
SORTATTRS = '<#{$1+$2.split.sort.join(' ')+$3}>'
|
SORTATTRS = '<#{$1+$2.split.sort.join(' ')+$3}>'
|
||||||
|
|
||||||
def assert_xml_equal(input, expected=nil, parser=HTML5lib::XMLParser)
|
def assert_xml_equal(input, expected=nil, parser=HTML5lib::XMLParser)
|
||||||
document = parser.parse(input.chomp).root
|
document = parser.parse(input.chomp).root
|
||||||
if not expected
|
if not expected
|
||||||
expected = input.chomp.gsub(XMLELEM,SORTATTRS)
|
expected = input.chomp.gsub(XMLELEM,SORTATTRS)
|
||||||
expected = expected.gsub(/&#(\d+);/) {[$1.to_i].pack('U')}
|
expected = expected.gsub(/&#(\d+);/) {[$1.to_i].pack('U')}
|
||||||
output = document.to_s.gsub(/'/,'"').gsub(XMLELEM,SORTATTRS)
|
output = document.to_s.gsub(/'/,'"').gsub(XMLELEM,SORTATTRS)
|
||||||
assert_equal(expected, output)
|
assert_equal(expected, output)
|
||||||
else
|
else
|
||||||
assert_equal(expected, document.to_s.gsub(/'/,'"'))
|
assert_equal(expected, document.to_s.gsub(/'/,'"'))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def assert_xhtml_equal(input, expected=nil, parser=HTML5lib::XHTMLParser)
|
def assert_xhtml_equal(input, expected=nil, parser=HTML5lib::XHTMLParser)
|
||||||
assert_xml_equal(input, expected, parser)
|
assert_xml_equal(input, expected, parser)
|
||||||
end
|
end
|
||||||
|
|
||||||
class BasicXhtml5Test < Test::Unit::TestCase
|
class BasicXhtml5Test < Test::Unit::TestCase
|
||||||
|
@ -27,8 +27,8 @@ class BasicXhtml5Test < Test::Unit::TestCase
|
||||||
assert_xhtml_equal(
|
assert_xhtml_equal(
|
||||||
'<title>Xhtml</title><b><i>content</b></i>',
|
'<title>Xhtml</title><b><i>content</b></i>',
|
||||||
'<html xmlns="http://www.w3.org/1999/xhtml">' +
|
'<html xmlns="http://www.w3.org/1999/xhtml">' +
|
||||||
'<head><title>Xhtml</title></head>' +
|
'<head><title>Xhtml</title></head>' +
|
||||||
'<body><b><i>content</i></b></body>' +
|
'<body><b><i>content</i></b></body>' +
|
||||||
'</html>')
|
'</html>')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -36,8 +36,8 @@ class BasicXhtml5Test < Test::Unit::TestCase
|
||||||
assert_xhtml_equal(
|
assert_xhtml_equal(
|
||||||
'<title>mdash</title>A &mdash B',
|
'<title>mdash</title>A &mdash B',
|
||||||
'<html xmlns="http://www.w3.org/1999/xhtml">' +
|
'<html xmlns="http://www.w3.org/1999/xhtml">' +
|
||||||
'<head><title>mdash</title></head>' +
|
'<head><title>mdash</title></head>' +
|
||||||
'<body>A '+ [0x2014].pack('U') + ' B</body>' +
|
'<body>A '+ [0x2014].pack('U') + ' B</body>' +
|
||||||
'</html>')
|
'</html>')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -70,24 +70,24 @@ class OpmlTest < Test::Unit::TestCase
|
||||||
def test_mixedCaseElement
|
def test_mixedCaseElement
|
||||||
assert_xml_equal(
|
assert_xml_equal(
|
||||||
'<opml version="1.0">' +
|
'<opml version="1.0">' +
|
||||||
'<head><ownerName>Dave Winer</ownerName></head>' +
|
'<head><ownerName>Dave Winer</ownerName></head>' +
|
||||||
'</opml>')
|
'</opml>')
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_mixedCaseAttribute
|
def test_mixedCaseAttribute
|
||||||
assert_xml_equal(
|
assert_xml_equal(
|
||||||
'<opml version="1.0">' +
|
'<opml version="1.0">' +
|
||||||
'<body><outline isComment="true"/></body>' +
|
'<body><outline isComment="true"/></body>' +
|
||||||
'</opml>')
|
'</opml>')
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_malformed
|
def test_malformed
|
||||||
assert_xml_equal(
|
assert_xml_equal(
|
||||||
'<opml version="1.0">' +
|
'<opml version="1.0">' +
|
||||||
'<body><outline text="Odds & Ends"/></body>' +
|
'<body><outline text="Odds & Ends"/></body>' +
|
||||||
'</opml>',
|
'</opml>',
|
||||||
'<opml version="1.0">' +
|
'<opml version="1.0">' +
|
||||||
'<body><outline text="Odds & Ends"/></body>' +
|
'<body><outline text="Odds & Ends"/></body>' +
|
||||||
'</opml>')
|
'</opml>')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -100,45 +100,45 @@ class XhtmlTest < Test::Unit::TestCase
|
||||||
<head><title>MathML</title></head>
|
<head><title>MathML</title></head>
|
||||||
<body>
|
<body>
|
||||||
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
||||||
|
<mrow>
|
||||||
|
<mi>x</mi>
|
||||||
|
<mo>=</mo>
|
||||||
|
|
||||||
|
<mfrac>
|
||||||
<mrow>
|
<mrow>
|
||||||
<mi>x</mi>
|
<mrow>
|
||||||
<mo>=</mo>
|
<mo>-</mo>
|
||||||
|
<mi>b</mi>
|
||||||
|
</mrow>
|
||||||
|
<mo>±</mo>
|
||||||
|
<msqrt>
|
||||||
|
|
||||||
<mfrac>
|
<mrow>
|
||||||
|
<msup>
|
||||||
|
<mi>b</mi>
|
||||||
|
<mn>2</mn>
|
||||||
|
</msup>
|
||||||
|
<mo>-</mo>
|
||||||
<mrow>
|
<mrow>
|
||||||
<mrow>
|
|
||||||
<mo>-</mo>
|
|
||||||
<mi>b</mi>
|
|
||||||
</mrow>
|
|
||||||
<mo>±</mo>
|
|
||||||
<msqrt>
|
|
||||||
|
|
||||||
<mrow>
|
<mn>4</mn>
|
||||||
<msup>
|
<mo>⁢</mo>
|
||||||
<mi>b</mi>
|
<mi>a</mi>
|
||||||
<mn>2</mn>
|
<mo>⁢</mo>
|
||||||
</msup>
|
<mi>c</mi>
|
||||||
<mo>-</mo>
|
|
||||||
<mrow>
|
|
||||||
|
|
||||||
<mn>4</mn>
|
|
||||||
<mo>⁢</mo>
|
|
||||||
<mi>a</mi>
|
|
||||||
<mo>⁢</mo>
|
|
||||||
<mi>c</mi>
|
|
||||||
</mrow>
|
|
||||||
</mrow>
|
|
||||||
|
|
||||||
</msqrt>
|
|
||||||
</mrow>
|
</mrow>
|
||||||
<mrow>
|
</mrow>
|
||||||
<mn>2</mn>
|
|
||||||
<mo>⁢</mo>
|
|
||||||
<mi>a</mi>
|
|
||||||
</mrow>
|
|
||||||
</mfrac>
|
|
||||||
|
|
||||||
|
</msqrt>
|
||||||
</mrow>
|
</mrow>
|
||||||
|
<mrow>
|
||||||
|
<mn>2</mn>
|
||||||
|
<mo>⁢</mo>
|
||||||
|
<mi>a</mi>
|
||||||
|
</mrow>
|
||||||
|
</mfrac>
|
||||||
|
|
||||||
|
</mrow>
|
||||||
</math>
|
</math>
|
||||||
</body></html>
|
</body></html>
|
||||||
EOX
|
EOX
|
||||||
|
@ -150,11 +150,11 @@ EOX
|
||||||
<head><title>SVG</title></head>
|
<head><title>SVG</title></head>
|
||||||
<body>
|
<body>
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
|
||||||
<path d="M38,38c0-12,24-15,23-2c0,9-16,13-16,23v7h11v-4c0-9,17-12,17-27
|
<path d="M38,38c0-12,24-15,23-2c0,9-16,13-16,23v7h11v-4c0-9,17-12,17-27
|
||||||
c-2-22-45-22-45,3zM45,70h11v11h-11z" fill="#371">
|
c-2-22-45-22-45,3zM45,70h11v11h-11z" fill="#371">
|
||||||
</path>
|
</path>
|
||||||
<circle cx="50" cy="50" r="45" fill="none" stroke="#371" stroke-width="10">
|
<circle cx="50" cy="50" r="45" fill="none" stroke="#371" stroke-width="10">
|
||||||
</circle>
|
</circle>
|
||||||
|
|
||||||
</svg>
|
</svg>
|
||||||
</body></html>
|
</body></html>
|
||||||
|
@ -167,24 +167,24 @@ EOX
|
||||||
<head><title>XLINK</title></head>
|
<head><title>XLINK</title></head>
|
||||||
<body>
|
<body>
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
|
||||||
<defs xmlns:l="http://www.w3.org/1999/xlink">
|
<defs xmlns:l="http://www.w3.org/1999/xlink">
|
||||||
<radialGradient id="s1" fx=".4" fy=".2" r=".7">
|
<radialGradient id="s1" fx=".4" fy=".2" r=".7">
|
||||||
<stop stop-color="#FE8"/>
|
<stop stop-color="#FE8"/>
|
||||||
<stop stop-color="#D70" offset="1"/>
|
<stop stop-color="#D70" offset="1"/>
|
||||||
</radialGradient>
|
</radialGradient>
|
||||||
<radialGradient id="s2" fx=".8" fy=".5" l:href="#s1"/>
|
<radialGradient id="s2" fx=".8" fy=".5" l:href="#s1"/>
|
||||||
<radialGradient id="s3" fx=".5" fy=".9" l:href="#s1"/>
|
<radialGradient id="s3" fx=".5" fy=".9" l:href="#s1"/>
|
||||||
<radialGradient id="s4" fx=".1" fy=".5" l:href="#s1"/>
|
<radialGradient id="s4" fx=".1" fy=".5" l:href="#s1"/>
|
||||||
</defs>
|
</defs>
|
||||||
<g stroke="#940">
|
<g stroke="#940">
|
||||||
<path d="M73,29c-37-40-62-24-52,4l6-7c-8-16,7-26,42,9z" fill="url(#s1)"/>
|
<path d="M73,29c-37-40-62-24-52,4l6-7c-8-16,7-26,42,9z" fill="url(#s1)"/>
|
||||||
<path d="M47,8c33-16,48,21,9,47l-6-5c38-27,20-44,5-37z" fill="url(#s2)"/>
|
<path d="M47,8c33-16,48,21,9,47l-6-5c38-27,20-44,5-37z" fill="url(#s2)"/>
|
||||||
<path d="M77,32c22,30,10,57-39,51l-1-8c3,3,67,5,36-36z" fill="url(#s3)"/>
|
<path d="M77,32c22,30,10,57-39,51l-1-8c3,3,67,5,36-36z" fill="url(#s3)"/>
|
||||||
|
|
||||||
<path d="M58,84c-4,20-38-4-8-24l-6-5c-36,43,15,56,23,27z" fill="url(#s4)"/>
|
<path d="M58,84c-4,20-38-4-8-24l-6-5c-36,43,15,56,23,27z" fill="url(#s4)"/>
|
||||||
<path d="M40,14c-40,37-37,52-9,68l1-8c-16-13-29-21,16-56z" fill="url(#s1)"/>
|
<path d="M40,14c-40,37-37,52-9,68l1-8c-16-13-29-21,16-56z" fill="url(#s1)"/>
|
||||||
<path d="M31,33c19,23,20,7,35,41l-9,1.7c-4-19-8-14-31-37z" fill="url(#s2)"/>
|
<path d="M31,33c19,23,20,7,35,41l-9,1.7c-4-19-8-14-31-37z" fill="url(#s2)"/>
|
||||||
</g>
|
</g>
|
||||||
</svg>
|
</svg>
|
||||||
</body></html>
|
</body></html>
|
||||||
EOX
|
EOX
|
||||||
|
|
164
vendor/plugins/HTML5lib/tests/test_parser.rb
vendored
164
vendor/plugins/HTML5lib/tests/test_parser.rb
vendored
|
@ -7,8 +7,8 @@ require 'html5lib/html5parser'
|
||||||
$tree_types_to_test = ['simpletree', 'rexml']
|
$tree_types_to_test = ['simpletree', 'rexml']
|
||||||
|
|
||||||
begin
|
begin
|
||||||
require 'hpricot'
|
require 'hpricot'
|
||||||
$tree_types_to_test.push('hpricot')
|
$tree_types_to_test.push('hpricot')
|
||||||
rescue LoadError
|
rescue LoadError
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -19,90 +19,90 @@ puts 'Testing: ' + $tree_types_to_test * ', '
|
||||||
|
|
||||||
class Html5ParserTestCase < Test::Unit::TestCase
|
class Html5ParserTestCase < Test::Unit::TestCase
|
||||||
|
|
||||||
def self.startswith?(a, b)
|
def self.startswith?(a, b)
|
||||||
b[0... a.length] == a
|
b[0... a.length] == a
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.parseTestcase(data)
|
def self.parseTestcase(data)
|
||||||
innerHTML = nil
|
innerHTML = nil
|
||||||
input = []
|
input = []
|
||||||
output = []
|
output = []
|
||||||
errors = []
|
errors = []
|
||||||
currentList = input
|
currentList = input
|
||||||
data.split(/\n/).each do |line|
|
data.split(/\n/).each do |line|
|
||||||
if !line.empty? and !startswith?("#errors", line) and
|
if !line.empty? and !startswith?("#errors", line) and
|
||||||
!startswith?("#document", line) and
|
!startswith?("#document", line) and
|
||||||
!startswith?("#data", line) and
|
!startswith?("#data", line) and
|
||||||
!startswith?("#document-fragment", line)
|
!startswith?("#document-fragment", line)
|
||||||
|
|
||||||
if currentList == output and startswith?("|", line)
|
if currentList == output and startswith?("|", line)
|
||||||
currentList.push(line[2..-1])
|
currentList.push(line[2..-1])
|
||||||
else
|
else
|
||||||
currentList.push(line)
|
currentList.push(line)
|
||||||
end
|
|
||||||
elsif line == "#errors"
|
|
||||||
currentList = errors
|
|
||||||
elsif line == "#document" or startswith?("#document-fragment", line)
|
|
||||||
if startswith?("#document-fragment", line)
|
|
||||||
innerHTML = line[19..-1]
|
|
||||||
raise AssertionError unless innerHTML
|
|
||||||
end
|
|
||||||
currentList = output
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
return innerHTML, input.join("\n"), output.join("\n"), errors
|
elsif line == "#errors"
|
||||||
end
|
currentList = errors
|
||||||
|
elsif line == "#document" or startswith?("#document-fragment", line)
|
||||||
# convert the output of str(document) to the format used in the testcases
|
if startswith?("#document-fragment", line)
|
||||||
def convertTreeDump(treedump)
|
innerHTML = line[19..-1]
|
||||||
treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
|
raise AssertionError unless innerHTML
|
||||||
end
|
|
||||||
|
|
||||||
def sortattrs(output)
|
|
||||||
output.gsub(/^(\s+)\w+=.*(\n\1\w+=.*)+/) { |match| match.split("\n").sort.join("\n") }
|
|
||||||
end
|
|
||||||
|
|
||||||
html5lib_test_files('tree-construction').each do |test_file|
|
|
||||||
|
|
||||||
test_name = File.basename(test_file).sub('.dat', '')
|
|
||||||
|
|
||||||
File.read(test_file).split("#data\n").each_with_index do |data, index|
|
|
||||||
next if data.empty?
|
|
||||||
|
|
||||||
innerHTML, input, expected_output, expected_errors = parseTestcase(data)
|
|
||||||
|
|
||||||
$tree_types_to_test.each do |tree_name|
|
|
||||||
define_method 'test_%s_%d_%s' % [ test_name, index + 1, tree_name ] do
|
|
||||||
|
|
||||||
parser = HTML5lib::HTMLParser.new(:tree => HTML5lib::TreeBuilders.getTreeBuilder(tree_name))
|
|
||||||
|
|
||||||
if innerHTML
|
|
||||||
parser.parseFragment(input, innerHTML)
|
|
||||||
else
|
|
||||||
parser.parse(input)
|
|
||||||
end
|
|
||||||
|
|
||||||
actual_output = convertTreeDump(parser.tree.testSerializer(parser.tree.document))
|
|
||||||
|
|
||||||
assert_equal sortattrs(expected_output), sortattrs(actual_output), [
|
|
||||||
'Input:', input,
|
|
||||||
'Expected:', expected_output,
|
|
||||||
'Recieved:', actual_output
|
|
||||||
].join("\n")
|
|
||||||
|
|
||||||
if $CHECK_PARSER_ERRORS
|
|
||||||
actual_errors = parser.errors.map do |(line, col), message|
|
|
||||||
'Line: %i Col: %i %s' % [line, col, message]
|
|
||||||
end
|
|
||||||
assert_equal parser.errors.length, expected_errors.length, [
|
|
||||||
'Expected errors:', expected_errors.join("\n"),
|
|
||||||
'Actual errors:', actual_errors.join("\n")
|
|
||||||
].join("\n")
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
currentList = output
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
return innerHTML, input.join("\n"), output.join("\n"), errors
|
||||||
|
end
|
||||||
|
|
||||||
|
# convert the output of str(document) to the format used in the testcases
|
||||||
|
def convertTreeDump(treedump)
|
||||||
|
treedump.split(/\n/)[1..-1].map { |line| (line.length > 2 and line[0] == ?|) ? line[3..-1] : line }.join("\n")
|
||||||
|
end
|
||||||
|
|
||||||
|
def sortattrs(output)
|
||||||
|
output.gsub(/^(\s+)\w+=.*(\n\1\w+=.*)+/) { |match| match.split("\n").sort.join("\n") }
|
||||||
|
end
|
||||||
|
|
||||||
|
html5lib_test_files('tree-construction').each do |test_file|
|
||||||
|
|
||||||
|
test_name = File.basename(test_file).sub('.dat', '')
|
||||||
|
|
||||||
|
File.read(test_file).split("#data\n").each_with_index do |data, index|
|
||||||
|
next if data.empty?
|
||||||
|
|
||||||
|
innerHTML, input, expected_output, expected_errors = parseTestcase(data)
|
||||||
|
|
||||||
|
$tree_types_to_test.each do |tree_name|
|
||||||
|
define_method 'test_%s_%d_%s' % [ test_name, index + 1, tree_name ] do
|
||||||
|
|
||||||
|
parser = HTML5lib::HTMLParser.new(:tree => HTML5lib::TreeBuilders.getTreeBuilder(tree_name))
|
||||||
|
|
||||||
|
if innerHTML
|
||||||
|
parser.parseFragment(input, innerHTML)
|
||||||
|
else
|
||||||
|
parser.parse(input)
|
||||||
|
end
|
||||||
|
|
||||||
|
actual_output = convertTreeDump(parser.tree.testSerializer(parser.tree.document))
|
||||||
|
|
||||||
|
assert_equal sortattrs(expected_output), sortattrs(actual_output), [
|
||||||
|
'Input:', input,
|
||||||
|
'Expected:', expected_output,
|
||||||
|
'Recieved:', actual_output
|
||||||
|
].join("\n")
|
||||||
|
|
||||||
|
if $CHECK_PARSER_ERRORS
|
||||||
|
actual_errors = parser.errors.map do |(line, col), message|
|
||||||
|
'Line: %i Col: %i %s' % [line, col, message]
|
||||||
|
end
|
||||||
|
assert_equal parser.errors.length, expected_errors.length, [
|
||||||
|
'Expected errors:', expected_errors.join("\n"),
|
||||||
|
'Actual errors:', actual_errors.join("\n")
|
||||||
|
].join("\n")
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -203,4 +203,8 @@ class SanitizeTest < Test::Unit::TestCase
|
||||||
sanitize_html(%(<img src='vbscript:msgbox("XSS")' />))
|
sanitize_html(%(<img src='vbscript:msgbox("XSS")' />))
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_should_handle_astral_plane_characters
|
||||||
|
assert_equal "<p>\360\235\222\265 \360\235\224\270</p>",
|
||||||
|
sanitize_html("<p>𝒵 𝔸</p>")
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
116
vendor/plugins/HTML5lib/tests/test_tokenizer.rb
vendored
116
vendor/plugins/HTML5lib/tests/test_tokenizer.rb
vendored
|
@ -4,75 +4,63 @@ require 'html5lib/tokenizer'
|
||||||
|
|
||||||
require 'tokenizer_test_parser'
|
require 'tokenizer_test_parser'
|
||||||
|
|
||||||
begin
|
|
||||||
require 'jsonx'
|
|
||||||
rescue LoadError
|
|
||||||
class JSON
|
|
||||||
def self.parse json
|
|
||||||
json.gsub! /"\s*:/, '"=>'
|
|
||||||
json.gsub!(/\\u[0-9a-fA-F]{4}/) {|x| [x[2..-1].to_i(16)].pack('U')}
|
|
||||||
eval json
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class Html5TokenizerTestCase < Test::Unit::TestCase
|
class Html5TokenizerTestCase < Test::Unit::TestCase
|
||||||
|
|
||||||
def type_of?(token_name, token)
|
def type_of?(token_name, token)
|
||||||
token != 'ParseError' and token_name == token.first
|
token != 'ParseError' and token_name == token.first
|
||||||
|
end
|
||||||
|
|
||||||
|
def convert_attribute_arrays_to_hashes(tokens)
|
||||||
|
tokens.inject([]) do |tokens, token|
|
||||||
|
token[2] = Hash[*token[2].reverse.flatten] if type_of?('StartTag', token)
|
||||||
|
tokens << token
|
||||||
end
|
end
|
||||||
|
end
|
||||||
def convert_attribute_arrays_to_hashes(tokens)
|
|
||||||
tokens.inject([]) do |tokens, token|
|
def concatenate_consecutive_characters(tokens)
|
||||||
token[2] = Hash[*token[2].reverse.flatten] if type_of?('StartTag', token)
|
tokens.inject([]) do |tokens, token|
|
||||||
tokens << token
|
if type_of?('Character', token) and tokens.any? and type_of?('Character', tokens.last)
|
||||||
end
|
tokens.last[1] = tokens.last[1] + token[1]
|
||||||
|
next tokens
|
||||||
|
end
|
||||||
|
tokens << token
|
||||||
end
|
end
|
||||||
|
end
|
||||||
def concatenate_consecutive_characters(tokens)
|
|
||||||
tokens.inject([]) do |tokens, token|
|
def tokenizer_test(data)
|
||||||
if type_of?('Character', token) and tokens.any? and type_of?('Character', tokens.last)
|
(data['contentModelFlags'] || [:PCDATA]).each do |content_model_flag|
|
||||||
tokens.last[1] = tokens.last[1] + token[1]
|
message = [
|
||||||
next tokens
|
'Description:', data['description'],
|
||||||
end
|
'Input:', data['input'],
|
||||||
tokens << token
|
'Content Model Flag:', content_model_flag ] * "\n"
|
||||||
end
|
|
||||||
end
|
assert_nothing_raised message do
|
||||||
|
tokenizer = HTML5lib::HTMLTokenizer.new(data['input'])
|
||||||
def tokenizer_test(data)
|
|
||||||
(data['contentModelFlags'] || [:PCDATA]).each do |content_model_flag|
|
tokenizer.contentModelFlag = content_model_flag.to_sym
|
||||||
message = [
|
|
||||||
'Description:', data['description'],
|
tokenizer.currentToken = {:type => :startTag, :name => data['lastStartTag']} if data.has_key?('lastStartTag')
|
||||||
'Input:', data['input'],
|
|
||||||
'Content Model Flag:', content_model_flag ] * "\n"
|
tokens = TokenizerTestParser.new(tokenizer).parse
|
||||||
|
|
||||||
assert_nothing_raised message do
|
actual = concatenate_consecutive_characters(convert_attribute_arrays_to_hashes(tokens))
|
||||||
tokenizer = HTML5lib::HTMLTokenizer.new(data['input'])
|
|
||||||
|
expected = concatenate_consecutive_characters(data['output'])
|
||||||
tokenizer.contentModelFlag = content_model_flag.to_sym
|
|
||||||
|
assert_equal expected, actual, message
|
||||||
tokenizer.currentToken = {:type => :startTag, :name => data['lastStartTag']} if data.has_key?('lastStartTag')
|
end
|
||||||
|
end
|
||||||
tokens = TokenizerTestParser.new(tokenizer).parse
|
end
|
||||||
|
|
||||||
actual = concatenate_consecutive_characters(convert_attribute_arrays_to_hashes(tokens))
|
html5lib_test_files('tokenizer').each do |test_file|
|
||||||
|
test_name = File.basename(test_file).sub('.test', '')
|
||||||
expected = concatenate_consecutive_characters(data['output'])
|
|
||||||
|
tests = JSON.parse(File.read(test_file))['tests']
|
||||||
assert_equal expected, actual, message
|
|
||||||
end
|
tests.each_with_index do |data, index|
|
||||||
end
|
define_method('test_%s_%d' % [test_name, index + 1]) { tokenizer_test data }
|
||||||
end
|
|
||||||
|
|
||||||
html5lib_test_files('tokenizer').each do |test_file|
|
|
||||||
test_name = File.basename(test_file).sub('.test', '')
|
|
||||||
|
|
||||||
tests = JSON.parse(File.read(test_file))['tests']
|
|
||||||
|
|
||||||
tests.each_with_index do |data, index|
|
|
||||||
define_method('test_%s_%d' % [test_name, index + 1]) { tokenizer_test data }
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -1,62 +1,62 @@
|
||||||
require 'html5lib/constants'
|
require 'html5lib/constants'
|
||||||
|
|
||||||
class TokenizerTestParser
|
class TokenizerTestParser
|
||||||
def initialize(tokenizer)
|
def initialize(tokenizer)
|
||||||
@tokenizer = tokenizer
|
@tokenizer = tokenizer
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse
|
||||||
|
@outputTokens = []
|
||||||
|
|
||||||
|
debug = nil
|
||||||
|
for token in @tokenizer
|
||||||
|
debug = token.inspect if token[:type] == :ParseError
|
||||||
|
send ('process' + token[:type].to_s), token
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse
|
return @outputTokens
|
||||||
@outputTokens = []
|
end
|
||||||
|
|
||||||
debug = nil
|
def processDoctype(token)
|
||||||
for token in @tokenizer
|
@outputTokens.push(["DOCTYPE", token[:name], token[:data]])
|
||||||
debug = token.inspect if token[:type] == :ParseError
|
end
|
||||||
send ('process' + token[:type].to_s), token
|
|
||||||
end
|
|
||||||
|
|
||||||
return @outputTokens
|
def processStartTag(token)
|
||||||
|
@outputTokens.push(["StartTag", token[:name], token[:data]])
|
||||||
|
end
|
||||||
|
|
||||||
|
def processEmptyTag(token)
|
||||||
|
if not HTML5lib::VOID_ELEMENTS.include? token[:name]
|
||||||
|
@outputTokens.push("ParseError")
|
||||||
end
|
end
|
||||||
|
@outputTokens.push(["StartTag", token[:name], token[:data]])
|
||||||
|
end
|
||||||
|
|
||||||
def processDoctype(token)
|
def processEndTag(token)
|
||||||
@outputTokens.push(["DOCTYPE", token[:name], token[:data]])
|
if token[:data].length > 0
|
||||||
|
self.processParseError(token)
|
||||||
end
|
end
|
||||||
|
@outputTokens.push(["EndTag", token[:name]])
|
||||||
|
end
|
||||||
|
|
||||||
def processStartTag(token)
|
def processComment(token)
|
||||||
@outputTokens.push(["StartTag", token[:name], token[:data]])
|
@outputTokens.push(["Comment", token[:data]])
|
||||||
end
|
end
|
||||||
|
|
||||||
def processEmptyTag(token)
|
def processCharacters(token)
|
||||||
if not HTML5lib::VOID_ELEMENTS.include? token[:name]
|
@outputTokens.push(["Character", token[:data]])
|
||||||
@outputTokens.push("ParseError")
|
end
|
||||||
end
|
|
||||||
@outputTokens.push(["StartTag", token[:name], token[:data]])
|
|
||||||
end
|
|
||||||
|
|
||||||
def processEndTag(token)
|
alias processSpaceCharacters processCharacters
|
||||||
if token[:data].length > 0
|
|
||||||
self.processParseError(token)
|
|
||||||
end
|
|
||||||
@outputTokens.push(["EndTag", token[:name]])
|
|
||||||
end
|
|
||||||
|
|
||||||
def processComment(token)
|
def processCharacters(token)
|
||||||
@outputTokens.push(["Comment", token[:data]])
|
@outputTokens.push(["Character", token[:data]])
|
||||||
end
|
end
|
||||||
|
|
||||||
def processCharacters(token)
|
def processEOF(token)
|
||||||
@outputTokens.push(["Character", token[:data]])
|
end
|
||||||
end
|
|
||||||
|
|
||||||
alias processSpaceCharacters processCharacters
|
def processParseError(token)
|
||||||
|
@outputTokens.push("ParseError")
|
||||||
def processCharacters(token)
|
end
|
||||||
@outputTokens.push(["Character", token[:data]])
|
|
||||||
end
|
|
||||||
|
|
||||||
def processEOF(token)
|
|
||||||
end
|
|
||||||
|
|
||||||
def processParseError(token)
|
|
||||||
@outputTokens.push("ParseError")
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
Loading…
Add table
Reference in a new issue