5dd0507acc
Fixes to the html5lib sanitizer and maruku to support the SVG <foreignObject> element. Also update to the latest REXML.
1246 lines
44 KiB
Ruby
1246 lines
44 KiB
Ruby
require "rexml/parent"
|
|
require "rexml/namespace"
|
|
require "rexml/attribute"
|
|
require "rexml/cdata"
|
|
require "rexml/xpath"
|
|
require "rexml/parseexception"
|
|
|
|
module REXML
|
|
# An implementation note about namespaces:
|
|
# As we parse, when we find namespaces we put them in a hash and assign
|
|
# them a unique ID. We then convert the namespace prefix for the node
|
|
# to the unique ID. This makes namespace lookup much faster for the
|
|
# cost of extra memory use. We save the namespace prefix for the
|
|
# context node and convert it back when we write it.
|
|
@@namespaces = {}
|
|
|
|
# Represents a tagged XML element. Elements are characterized by
|
|
# having children, attributes, and names, and can themselves be
|
|
# children.
|
|
class Element < Parent
|
|
include Namespace
|
|
|
|
UNDEFINED = "UNDEFINED"; # The default name
|
|
|
|
# Mechanisms for accessing attributes and child elements of this
|
|
# element.
|
|
attr_reader :attributes, :elements
|
|
# The context holds information about the processing environment, such as
|
|
# whitespace handling.
|
|
attr_accessor :context
|
|
|
|
# Constructor
|
|
# arg::
|
|
# if not supplied, will be set to the default value.
|
|
# If a String, the name of this object will be set to the argument.
|
|
# If an Element, the object will be shallowly cloned; name,
|
|
# attributes, and namespaces will be copied. Children will +not+ be
|
|
# copied.
|
|
# parent::
|
|
# if supplied, must be a Parent, and will be used as
|
|
# the parent of this object.
|
|
# context::
|
|
# If supplied, must be a hash containing context items. Context items
|
|
# include:
|
|
# * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of
|
|
# strings being the names of the elements to respect
|
|
# whitespace for. Defaults to :+all+.
|
|
# * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of
|
|
# strings being the names of the elements to ignore whitespace on.
|
|
# Overrides :+respect_whitespace+.
|
|
# * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array
|
|
# of strings being the names of the elements in which to ignore
|
|
# whitespace-only nodes. If this is set, Text nodes which contain only
|
|
# whitespace will not be added to the document tree.
|
|
# * <tt>:raw</tt> can be :+all+, or an array of strings being the names of
|
|
# the elements to process in raw mode. In raw mode, special
|
|
# characters in text is not converted to or from entities.
|
|
def initialize( arg = UNDEFINED, parent=nil, context=nil )
|
|
super(parent)
|
|
|
|
@elements = Elements.new(self)
|
|
@attributes = Attributes.new(self)
|
|
@context = context
|
|
|
|
if arg.kind_of? String
|
|
self.name = arg
|
|
elsif arg.kind_of? Element
|
|
self.name = arg.expanded_name
|
|
arg.attributes.each_attribute{ |attribute|
|
|
@attributes << Attribute.new( attribute )
|
|
}
|
|
@context = arg.context
|
|
end
|
|
end
|
|
|
|
def inspect
|
|
rv = "<#@expanded_name"
|
|
|
|
@attributes.each_attribute do |attr|
|
|
rv << " "
|
|
attr.write( rv, 0 )
|
|
end
|
|
|
|
if children.size > 0
|
|
rv << "> ... </>"
|
|
else
|
|
rv << "/>"
|
|
end
|
|
end
|
|
|
|
|
|
# Creates a shallow copy of self.
|
|
# d = Document.new "<a><b/><b/><c><d/></c></a>"
|
|
# new_a = d.root.clone
|
|
# puts new_a # => "<a/>"
|
|
def clone
|
|
self.class.new self
|
|
end
|
|
|
|
# Evaluates to the root node of the document that this element
|
|
# belongs to. If this element doesn't belong to a document, but does
|
|
# belong to another Element, the parent's root will be returned, until the
|
|
# earliest ancestor is found.
|
|
#
|
|
# Note that this is not the same as the document element.
|
|
# In the following example, <a> is the document element, and the root
|
|
# node is the parent node of the document element. You may ask yourself
|
|
# why the root node is useful: consider the doctype and XML declaration,
|
|
# and any processing instructions before the document element... they
|
|
# are children of the root node, or siblings of the document element.
|
|
# The only time this isn't true is when an Element is created that is
|
|
# not part of any Document. In this case, the ancestor that has no
|
|
# parent acts as the root node.
|
|
# d = Document.new '<a><b><c/></b></a>'
|
|
# a = d[1] ; c = a[1][1]
|
|
# d.root_node == d # TRUE
|
|
# a.root_node # namely, d
|
|
# c.root_node # again, d
|
|
def root_node
|
|
parent.nil? ? self : parent.root_node
|
|
end
|
|
|
|
def root
|
|
return elements[1] if self.kind_of? Document
|
|
return self if parent.kind_of? Document or parent.nil?
|
|
return parent.root
|
|
end
|
|
|
|
# Evaluates to the document to which this element belongs, or nil if this
|
|
# element doesn't belong to a document.
|
|
def document
|
|
rt = root
|
|
rt.parent if rt
|
|
end
|
|
|
|
# Evaluates to +true+ if whitespace is respected for this element. This
|
|
# is the case if:
|
|
# 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
|
|
# 2. The context has :+respect_whitespace+ set to :+all+ or
|
|
# an array containing the name of this element, and
|
|
# :+compress_whitespace+ isn't set to :+all+ or an array containing the
|
|
# name of this element.
|
|
# The evaluation is tested against +expanded_name+, and so is namespace
|
|
# sensitive.
|
|
def whitespace
|
|
@whitespace = nil
|
|
if @context
|
|
if @context[:respect_whitespace]
|
|
@whitespace = (@context[:respect_whitespace] == :all or
|
|
@context[:respect_whitespace].include? expanded_name)
|
|
end
|
|
@whitespace = false if (@context[:compress_whitespace] and
|
|
(@context[:compress_whitespace] == :all or
|
|
@context[:compress_whitespace].include? expanded_name)
|
|
)
|
|
end
|
|
@whitespace = true unless @whitespace == false
|
|
@whitespace
|
|
end
|
|
|
|
def ignore_whitespace_nodes
|
|
@ignore_whitespace_nodes = false
|
|
if @context
|
|
if @context[:ignore_whitespace_nodes]
|
|
@ignore_whitespace_nodes =
|
|
(@context[:ignore_whitespace_nodes] == :all or
|
|
@context[:ignore_whitespace_nodes].include? expanded_name)
|
|
end
|
|
end
|
|
end
|
|
|
|
# Evaluates to +true+ if raw mode is set for this element. This
|
|
# is the case if the context has :+raw+ set to :+all+ or
|
|
# an array containing the name of this element.
|
|
#
|
|
# The evaluation is tested against +expanded_name+, and so is namespace
|
|
# sensitive.
|
|
def raw
|
|
@raw = (@context and @context[:raw] and
|
|
(@context[:raw] == :all or
|
|
@context[:raw].include? expanded_name))
|
|
@raw
|
|
end
|
|
|
|
#once :whitespace, :raw, :ignore_whitespace_nodes
|
|
|
|
#################################################
|
|
# Namespaces #
|
|
#################################################
|
|
|
|
# Evaluates to an +Array+ containing the prefixes (names) of all defined
|
|
# namespaces at this context node.
|
|
# doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
|
|
# doc.elements['//b'].prefixes # -> ['x', 'y']
|
|
def prefixes
|
|
prefixes = []
|
|
prefixes = parent.prefixes if parent
|
|
prefixes |= attributes.prefixes
|
|
return prefixes
|
|
end
|
|
|
|
def namespaces
|
|
namespaces = {}
|
|
namespaces = parent.namespaces if parent
|
|
namespaces = namespaces.merge( attributes.namespaces )
|
|
return namespaces
|
|
end
|
|
|
|
# Evalutas to the URI for a prefix, or the empty string if no such
|
|
# namespace is declared for this element. Evaluates recursively for
|
|
# ancestors. Returns the default namespace, if there is one.
|
|
# prefix::
|
|
# the prefix to search for. If not supplied, returns the default
|
|
# namespace if one exists
|
|
# Returns::
|
|
# the namespace URI as a String, or nil if no such namespace
|
|
# exists. If the namespace is undefined, returns an empty string
|
|
# doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
|
|
# b = doc.elements['//b']
|
|
# b.namespace # -> '1'
|
|
# b.namespace("y") # -> '2'
|
|
def namespace(prefix=nil)
|
|
if prefix.nil?
|
|
prefix = prefix()
|
|
end
|
|
if prefix == ''
|
|
prefix = "xmlns"
|
|
else
|
|
prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
|
|
end
|
|
ns = attributes[ prefix ]
|
|
ns = parent.namespace(prefix) if ns.nil? and parent
|
|
ns = '' if ns.nil? and prefix == 'xmlns'
|
|
return ns
|
|
end
|
|
|
|
# Adds a namespace to this element.
|
|
# prefix::
|
|
# the prefix string, or the namespace URI if +uri+ is not
|
|
# supplied
|
|
# uri::
|
|
# the namespace URI. May be nil, in which +prefix+ is used as
|
|
# the URI
|
|
# Evaluates to: this Element
|
|
# a = Element.new("a")
|
|
# a.add_namespace("xmlns:foo", "bar" )
|
|
# a.add_namespace("foo", "bar") # shorthand for previous line
|
|
# a.add_namespace("twiddle")
|
|
# puts a #-> <a xmlns:foo='bar' xmlns='twiddle'/>
|
|
def add_namespace( prefix, uri=nil )
|
|
unless uri
|
|
@attributes["xmlns"] = prefix
|
|
else
|
|
prefix = "xmlns:#{prefix}" unless prefix =~ /^xmlns:/
|
|
@attributes[ prefix ] = uri
|
|
end
|
|
self
|
|
end
|
|
|
|
# Removes a namespace from this node. This only works if the namespace is
|
|
# actually declared in this node. If no argument is passed, deletes the
|
|
# default namespace.
|
|
#
|
|
# Evaluates to: this element
|
|
# doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>"
|
|
# doc.root.delete_namespace
|
|
# puts doc # -> <a xmlns:foo='bar'/>
|
|
# doc.root.delete_namespace 'foo'
|
|
# puts doc # -> <a/>
|
|
def delete_namespace namespace="xmlns"
|
|
namespace = "xmlns:#{namespace}" unless namespace == 'xmlns'
|
|
attribute = attributes.get_attribute(namespace)
|
|
attribute.remove unless attribute.nil?
|
|
self
|
|
end
|
|
|
|
#################################################
|
|
# Elements #
|
|
#################################################
|
|
|
|
# Adds a child to this element, optionally setting attributes in
|
|
# the element.
|
|
# element::
|
|
# optional. If Element, the element is added.
|
|
# Otherwise, a new Element is constructed with the argument (see
|
|
# Element.initialize).
|
|
# attrs::
|
|
# If supplied, must be a Hash containing String name,value
|
|
# pairs, which will be used to set the attributes of the new Element.
|
|
# Returns:: the Element that was added
|
|
# el = doc.add_element 'my-tag'
|
|
# el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'}
|
|
# el = Element.new 'my-tag'
|
|
# doc.add_element el
|
|
def add_element element, attrs=nil
|
|
raise "First argument must be either an element name, or an Element object" if element.nil?
|
|
el = @elements.add(element)
|
|
attrs.each do |key, value|
|
|
el.attributes[key]=value
|
|
end if attrs.kind_of? Hash
|
|
el
|
|
end
|
|
|
|
# Deletes a child element.
|
|
# element::
|
|
# Must be an +Element+, +String+, or +Integer+. If Element,
|
|
# the element is removed. If String, the element is found (via XPath)
|
|
# and removed. <em>This means that any parent can remove any
|
|
# descendant.<em> If Integer, the Element indexed by that number will be
|
|
# removed.
|
|
# Returns:: the element that was removed.
|
|
# doc.delete_element "/a/b/c[@id='4']"
|
|
# doc.delete_element doc.elements["//k"]
|
|
# doc.delete_element 1
|
|
def delete_element element
|
|
@elements.delete element
|
|
end
|
|
|
|
# Evaluates to +true+ if this element has at least one child Element
|
|
# doc = Document.new "<a><b/><c>Text</c></a>"
|
|
# doc.root.has_elements # -> true
|
|
# doc.elements["/a/b"].has_elements # -> false
|
|
# doc.elements["/a/c"].has_elements # -> false
|
|
def has_elements?
|
|
!@elements.empty?
|
|
end
|
|
|
|
# Iterates through the child elements, yielding for each Element that
|
|
# has a particular attribute set.
|
|
# key::
|
|
# the name of the attribute to search for
|
|
# value::
|
|
# the value of the attribute
|
|
# max::
|
|
# (optional) causes this method to return after yielding
|
|
# for this number of matching children
|
|
# name::
|
|
# (optional) if supplied, this is an XPath that filters
|
|
# the children to check.
|
|
#
|
|
# doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>"
|
|
# # Yields b, c, d
|
|
# doc.root.each_element_with_attribute( 'id' ) {|e| p e}
|
|
# # Yields b, d
|
|
# doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e}
|
|
# # Yields b
|
|
# doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e}
|
|
# # Yields d
|
|
# doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e}
|
|
def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element
|
|
each_with_something( proc {|child|
|
|
if value.nil?
|
|
child.attributes[key] != nil
|
|
else
|
|
child.attributes[key]==value
|
|
end
|
|
}, max, name, &block )
|
|
end
|
|
|
|
# Iterates through the children, yielding for each Element that
|
|
# has a particular text set.
|
|
# text::
|
|
# the text to search for. If nil, or not supplied, will itterate
|
|
# over all +Element+ children that contain at least one +Text+ node.
|
|
# max::
|
|
# (optional) causes this method to return after yielding
|
|
# for this number of matching children
|
|
# name::
|
|
# (optional) if supplied, this is an XPath that filters
|
|
# the children to check.
|
|
#
|
|
# doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
|
|
# # Yields b, c, d
|
|
# doc.each_element_with_text {|e|p e}
|
|
# # Yields b, c
|
|
# doc.each_element_with_text('b'){|e|p e}
|
|
# # Yields b
|
|
# doc.each_element_with_text('b', 1){|e|p e}
|
|
# # Yields d
|
|
# doc.each_element_with_text(nil, 0, 'd'){|e|p e}
|
|
def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element
|
|
each_with_something( proc {|child|
|
|
if text.nil?
|
|
child.has_text?
|
|
else
|
|
child.text == text
|
|
end
|
|
}, max, name, &block )
|
|
end
|
|
|
|
# Synonym for Element.elements.each
|
|
def each_element( xpath=nil, &block ) # :yields: Element
|
|
@elements.each( xpath, &block )
|
|
end
|
|
|
|
# Synonym for Element.to_a
|
|
# This is a little slower than calling elements.each directly.
|
|
# xpath:: any XPath by which to search for elements in the tree
|
|
# Returns:: an array of Elements that match the supplied path
|
|
def get_elements( xpath )
|
|
@elements.to_a( xpath )
|
|
end
|
|
|
|
# Returns the next sibling that is an element, or nil if there is
|
|
# no Element sibling after this one
|
|
# doc = Document.new '<a><b/>text<c/></a>'
|
|
# doc.root.elements['b'].next_element #-> <c/>
|
|
# doc.root.elements['c'].next_element #-> nil
|
|
def next_element
|
|
element = next_sibling
|
|
element = element.next_sibling until element.nil? or element.kind_of? Element
|
|
return element
|
|
end
|
|
|
|
# Returns the previous sibling that is an element, or nil if there is
|
|
# no Element sibling prior to this one
|
|
# doc = Document.new '<a><b/>text<c/></a>'
|
|
# doc.root.elements['c'].previous_element #-> <b/>
|
|
# doc.root.elements['b'].previous_element #-> nil
|
|
def previous_element
|
|
element = previous_sibling
|
|
element = element.previous_sibling until element.nil? or element.kind_of? Element
|
|
return element
|
|
end
|
|
|
|
|
|
#################################################
|
|
# Text #
|
|
#################################################
|
|
|
|
# Evaluates to +true+ if this element has at least one Text child
|
|
def has_text?
|
|
not text().nil?
|
|
end
|
|
|
|
# A convenience method which returns the String value of the _first_
|
|
# child text element, if one exists, and +nil+ otherwise.
|
|
#
|
|
# <em>Note that an element may have multiple Text elements, perhaps
|
|
# separated by other children</em>. Be aware that this method only returns
|
|
# the first Text node.
|
|
#
|
|
# This method returns the +value+ of the first text child node, which
|
|
# ignores the +raw+ setting, so always returns normalized text. See
|
|
# the Text::value documentation.
|
|
#
|
|
# doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
|
|
# # The element 'p' has two text elements, "some text " and " more text".
|
|
# doc.root.text #-> "some text "
|
|
def text( path = nil )
|
|
rv = get_text(path)
|
|
return rv.value unless rv.nil?
|
|
nil
|
|
end
|
|
|
|
# Returns the first child Text node, if any, or +nil+ otherwise.
|
|
# This method returns the actual +Text+ node, rather than the String content.
|
|
# doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
|
|
# # The element 'p' has two text elements, "some text " and " more text".
|
|
# doc.root.get_text.value #-> "some text "
|
|
def get_text path = nil
|
|
rv = nil
|
|
if path
|
|
element = @elements[ path ]
|
|
rv = element.get_text unless element.nil?
|
|
else
|
|
rv = @children.find { |node| node.kind_of? Text }
|
|
end
|
|
return rv
|
|
end
|
|
|
|
# Sets the first Text child of this object. See text() for a
|
|
# discussion about Text children.
|
|
#
|
|
# If a Text child already exists, the child is replaced by this
|
|
# content. This means that Text content can be deleted by calling
|
|
# this method with a nil argument. In this case, the next Text
|
|
# child becomes the first Text child. In no case is the order of
|
|
# any siblings disturbed.
|
|
# text::
|
|
# If a String, a new Text child is created and added to
|
|
# this Element as the first Text child. If Text, the text is set
|
|
# as the first Child element. If nil, then any existing first Text
|
|
# child is removed.
|
|
# Returns:: this Element.
|
|
# doc = Document.new '<a><b/></a>'
|
|
# doc.root.text = 'Sean' #-> '<a><b/>Sean</a>'
|
|
# doc.root.text = 'Elliott' #-> '<a><b/>Elliott</a>'
|
|
# doc.root.add_element 'c' #-> '<a><b/>Elliott<c/></a>'
|
|
# doc.root.text = 'Russell' #-> '<a><b/>Russell<c/></a>'
|
|
# doc.root.text = nil #-> '<a><b/><c/></a>'
|
|
def text=( text )
|
|
if text.kind_of? String
|
|
text = Text.new( text, whitespace(), nil, raw() )
|
|
elsif text and !text.kind_of? Text
|
|
text = Text.new( text.to_s, whitespace(), nil, raw() )
|
|
end
|
|
old_text = get_text
|
|
if text.nil?
|
|
old_text.remove unless old_text.nil?
|
|
else
|
|
if old_text.nil?
|
|
self << text
|
|
else
|
|
old_text.replace_with( text )
|
|
end
|
|
end
|
|
return self
|
|
end
|
|
|
|
# A helper method to add a Text child. Actual Text instances can
|
|
# be added with regular Parent methods, such as add() and <<()
|
|
# text::
|
|
# if a String, a new Text instance is created and added
|
|
# to the parent. If Text, the object is added directly.
|
|
# Returns:: this Element
|
|
# e = Element.new('a') #-> <e/>
|
|
# e.add_text 'foo' #-> <e>foo</e>
|
|
# e.add_text Text.new(' bar') #-> <e>foo bar</e>
|
|
# Note that at the end of this example, the branch has <b>3</b> nodes; the 'e'
|
|
# element and <b>2</b> Text node children.
|
|
def add_text( text )
|
|
if text.kind_of? String
|
|
if @children[-1].kind_of? Text
|
|
@children[-1] << text
|
|
return
|
|
end
|
|
text = Text.new( text, whitespace(), nil, raw() )
|
|
end
|
|
self << text unless text.nil?
|
|
return self
|
|
end
|
|
|
|
def node_type
|
|
:element
|
|
end
|
|
|
|
def xpath
|
|
path_elements = []
|
|
cur = self
|
|
path_elements << __to_xpath_helper( self )
|
|
while cur.parent
|
|
cur = cur.parent
|
|
path_elements << __to_xpath_helper( cur )
|
|
end
|
|
return path_elements.reverse.join( "/" )
|
|
end
|
|
|
|
#################################################
|
|
# Attributes #
|
|
#################################################
|
|
|
|
def attribute( name, namespace=nil )
|
|
prefix = nil
|
|
if namespaces.respond_to? :key
|
|
prefix = namespaces.key(namespace) if namespace
|
|
else
|
|
prefix = namespaces.index(namespace) if namespace
|
|
end
|
|
prefix = nil if prefix == 'xmlns'
|
|
|
|
ret_val =
|
|
attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
|
|
|
|
return ret_val unless ret_val.nil?
|
|
return nil if prefix.nil?
|
|
|
|
# now check that prefix'es namespace is not the same as the
|
|
# default namespace
|
|
return nil unless ( namespaces[ prefix ] == namespaces[ 'xmlns' ] )
|
|
|
|
attributes.get_attribute( name )
|
|
|
|
end
|
|
|
|
# Evaluates to +true+ if this element has any attributes set, false
|
|
# otherwise.
|
|
def has_attributes?
|
|
return !@attributes.empty?
|
|
end
|
|
|
|
# Adds an attribute to this element, overwriting any existing attribute
|
|
# by the same name.
|
|
# key::
|
|
# can be either an Attribute or a String. If an Attribute,
|
|
# the attribute is added to the list of Element attributes. If String,
|
|
# the argument is used as the name of the new attribute, and the value
|
|
# parameter must be supplied.
|
|
# value::
|
|
# Required if +key+ is a String, and ignored if the first argument is
|
|
# an Attribute. This is a String, and is used as the value
|
|
# of the new Attribute. This should be the unnormalized value of the
|
|
# attribute (without entities).
|
|
# Returns:: the Attribute added
|
|
# e = Element.new 'e'
|
|
# e.add_attribute( 'a', 'b' ) #-> <e a='b'/>
|
|
# e.add_attribute( 'x:a', 'c' ) #-> <e a='b' x:a='c'/>
|
|
# e.add_attribute Attribute.new('b', 'd') #-> <e a='b' x:a='c' b='d'/>
|
|
def add_attribute( key, value=nil )
|
|
if key.kind_of? Attribute
|
|
@attributes << key
|
|
else
|
|
@attributes[key] = value
|
|
end
|
|
end
|
|
|
|
# Add multiple attributes to this element.
|
|
# hash:: is either a hash, or array of arrays
|
|
# el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} )
|
|
# el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] )
|
|
def add_attributes hash
|
|
if hash.kind_of? Hash
|
|
hash.each_pair {|key, value| @attributes[key] = value }
|
|
elsif hash.kind_of? Array
|
|
hash.each { |value| @attributes[ value[0] ] = value[1] }
|
|
end
|
|
end
|
|
|
|
# Removes an attribute
|
|
# key::
|
|
# either an Attribute or a String. In either case, the
|
|
# attribute is found by matching the attribute name to the argument,
|
|
# and then removed. If no attribute is found, no action is taken.
|
|
# Returns::
|
|
# the attribute removed, or nil if this Element did not contain
|
|
# a matching attribute
|
|
# e = Element.new('E')
|
|
# e.add_attribute( 'name', 'Sean' ) #-> <E name='Sean'/>
|
|
# r = e.add_attribute( 'sur:name', 'Russell' ) #-> <E name='Sean' sur:name='Russell'/>
|
|
# e.delete_attribute( 'name' ) #-> <E sur:name='Russell'/>
|
|
# e.delete_attribute( r ) #-> <E/>
|
|
def delete_attribute(key)
|
|
attr = @attributes.get_attribute(key)
|
|
attr.remove unless attr.nil?
|
|
end
|
|
|
|
#################################################
|
|
# Other Utilities #
|
|
#################################################
|
|
|
|
# Get an array of all CData children.
|
|
# IMMUTABLE
|
|
def cdatas
|
|
find_all { |child| child.kind_of? CData }.freeze
|
|
end
|
|
|
|
# Get an array of all Comment children.
|
|
# IMMUTABLE
|
|
def comments
|
|
find_all { |child| child.kind_of? Comment }.freeze
|
|
end
|
|
|
|
# Get an array of all Instruction children.
|
|
# IMMUTABLE
|
|
def instructions
|
|
find_all { |child| child.kind_of? Instruction }.freeze
|
|
end
|
|
|
|
# Get an array of all Text children.
|
|
# IMMUTABLE
|
|
def texts
|
|
find_all { |child| child.kind_of? Text }.freeze
|
|
end
|
|
|
|
# == DEPRECATED
|
|
# See REXML::Formatters
|
|
#
|
|
# Writes out this element, and recursively, all children.
|
|
# output::
|
|
# output an object which supports '<< string'; this is where the
|
|
# document will be written.
|
|
# indent::
|
|
# An integer. If -1, no indenting will be used; otherwise, the
|
|
# indentation will be this number of spaces, and children will be
|
|
# indented an additional amount. Defaults to -1
|
|
# transitive::
|
|
# If transitive is true and indent is >= 0, then the output will be
|
|
# pretty-printed in such a way that the added whitespace does not affect
|
|
# the parse tree of the document
|
|
# ie_hack::
|
|
# Internet Explorer is the worst piece of crap to have ever been
|
|
# written, with the possible exception of Windows itself. Since IE is
|
|
# unable to parse proper XML, we have to provide a hack to generate XML
|
|
# that IE's limited abilities can handle. This hack inserts a space
|
|
# before the /> on empty tags. Defaults to false
|
|
#
|
|
# out = ''
|
|
# doc.write( out ) #-> doc is written to the string 'out'
|
|
# doc.write( $stdout ) #-> doc written to the console
|
|
def write(output=$stdout, indent=-1, transitive=false, ie_hack=false)
|
|
Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
|
|
formatter = if indent > -1
|
|
if transitive
|
|
REXML::Formatters::Transitive.new( indent, ie_hack )
|
|
else
|
|
REXML::Formatters::Pretty.new( indent, ie_hack )
|
|
end
|
|
else
|
|
REXML::Formatters::Default.new( ie_hack )
|
|
end
|
|
formatter.write( self, output )
|
|
end
|
|
|
|
|
|
private
|
|
def __to_xpath_helper node
|
|
rv = node.expanded_name.clone
|
|
if node.parent
|
|
results = node.parent.find_all {|n|
|
|
n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name
|
|
}
|
|
if results.length > 1
|
|
idx = results.index( node )
|
|
rv << "[#{idx+1}]"
|
|
end
|
|
end
|
|
rv
|
|
end
|
|
|
|
# A private helper method
|
|
def each_with_something( test, max=0, name=nil )
|
|
num = 0
|
|
@elements.each( name ){ |child|
|
|
yield child if test.call(child) and num += 1
|
|
return if max>0 and num == max
|
|
}
|
|
end
|
|
end
|
|
|
|
########################################################################
|
|
# ELEMENTS #
|
|
########################################################################
|
|
|
|
# A class which provides filtering of children for Elements, and
|
|
# XPath search support. You are expected to only encounter this class as
|
|
# the <tt>element.elements</tt> object. Therefore, you are
|
|
# _not_ expected to instantiate this yourself.
|
|
class Elements
|
|
include Enumerable
|
|
# Constructor
|
|
# parent:: the parent Element
|
|
def initialize parent
|
|
@element = parent
|
|
end
|
|
|
|
# Fetches a child element. Filters only Element children, regardless of
|
|
# the XPath match.
|
|
# index::
|
|
# the search parameter. This is either an Integer, which
|
|
# will be used to find the index'th child Element, or an XPath,
|
|
# which will be used to search for the Element. <em>Because
|
|
# of the nature of XPath searches, any element in the connected XML
|
|
# document can be fetched through any other element.</em> <b>The
|
|
# Integer index is 1-based, not 0-based.</b> This means that the first
|
|
# child element is at index 1, not 0, and the +n+th element is at index
|
|
# +n+, not <tt>n-1</tt>. This is because XPath indexes element children
|
|
# starting from 1, not 0, and the indexes should be the same.
|
|
# name::
|
|
# optional, and only used in the first argument is an
|
|
# Integer. In that case, the index'th child Element that has the
|
|
# supplied name will be returned. Note again that the indexes start at 1.
|
|
# Returns:: the first matching Element, or nil if no child matched
|
|
# doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>'
|
|
# doc.root.elements[1] #-> <b/>
|
|
# doc.root.elements['c'] #-> <c id="1"/>
|
|
# doc.root.elements[2,'c'] #-> <c id="2"/>
|
|
def []( index, name=nil)
|
|
if index.kind_of? Integer
|
|
raise "index (#{index}) must be >= 1" if index < 1
|
|
name = literalize(name) if name
|
|
num = 0
|
|
@element.find { |child|
|
|
child.kind_of? Element and
|
|
(name.nil? ? true : child.has_name?( name )) and
|
|
(num += 1) == index
|
|
}
|
|
else
|
|
return XPath::first( @element, index )
|
|
#{ |element|
|
|
# return element if element.kind_of? Element
|
|
#}
|
|
#return nil
|
|
end
|
|
end
|
|
|
|
# Sets an element, replacing any previous matching element. If no
|
|
# existing element is found ,the element is added.
|
|
# index:: Used to find a matching element to replace. See []().
|
|
# element::
|
|
# The element to replace the existing element with
|
|
# the previous element
|
|
# Returns:: nil if no previous element was found.
|
|
#
|
|
# doc = Document.new '<a/>'
|
|
# doc.root.elements[10] = Element.new('b') #-> <a><b/></a>
|
|
# doc.root.elements[1] #-> <b/>
|
|
# doc.root.elements[1] = Element.new('c') #-> <a><c/></a>
|
|
# doc.root.elements['c'] = Element.new('d') #-> <a><d/></a>
|
|
def []=( index, element )
|
|
previous = self[index]
|
|
if previous.nil?
|
|
@element.add element
|
|
else
|
|
previous.replace_with element
|
|
end
|
|
return previous
|
|
end
|
|
|
|
# Returns +true+ if there are no +Element+ children, +false+ otherwise
|
|
def empty?
|
|
@element.find{ |child| child.kind_of? Element}.nil?
|
|
end
|
|
|
|
# Returns the index of the supplied child (starting at 1), or -1 if
|
|
# the element is not a child
|
|
# element:: an +Element+ child
|
|
def index element
|
|
rv = 0
|
|
found = @element.find do |child|
|
|
child.kind_of? Element and
|
|
(rv += 1) and
|
|
child == element
|
|
end
|
|
return rv if found == element
|
|
return -1
|
|
end
|
|
|
|
# Deletes a child Element
|
|
# element::
|
|
# Either an Element, which is removed directly; an
|
|
# xpath, where the first matching child is removed; or an Integer,
|
|
# where the n'th Element is removed.
|
|
# Returns:: the removed child
|
|
# doc = Document.new '<a><b/><c/><c id="1"/></a>'
|
|
# b = doc.root.elements[1]
|
|
# doc.root.elements.delete b #-> <a><c/><c id="1"/></a>
|
|
# doc.elements.delete("a/c[@id='1']") #-> <a><c/></a>
|
|
# doc.root.elements.delete 1 #-> <a/>
|
|
def delete element
|
|
if element.kind_of? Element
|
|
@element.delete element
|
|
else
|
|
el = self[element]
|
|
el.remove if el
|
|
end
|
|
end
|
|
|
|
# Removes multiple elements. Filters for Element children, regardless of
|
|
# XPath matching.
|
|
# xpath:: all elements matching this String path are removed.
|
|
# Returns:: an Array of Elements that have been removed
|
|
# doc = Document.new '<a><c/><c/><c/><c/></a>'
|
|
# deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>]
|
|
def delete_all( xpath )
|
|
rv = []
|
|
XPath::each( @element, xpath) {|element|
|
|
rv << element if element.kind_of? Element
|
|
}
|
|
rv.each do |element|
|
|
@element.delete element
|
|
element.remove
|
|
end
|
|
return rv
|
|
end
|
|
|
|
# Adds an element
|
|
# element::
|
|
# if supplied, is either an Element, String, or
|
|
# Source (see Element.initialize). If not supplied or nil, a
|
|
# new, default Element will be constructed
|
|
# Returns:: the added Element
|
|
# a = Element.new('a')
|
|
# a.elements.add(Element.new('b')) #-> <a><b/></a>
|
|
# a.elements.add('c') #-> <a><b/><c/></a>
|
|
def add element=nil
|
|
rv = nil
|
|
if element.nil?
|
|
Element.new("", self, @element.context)
|
|
elsif not element.kind_of?(Element)
|
|
Element.new(element, self, @element.context)
|
|
else
|
|
@element << element
|
|
element.context = @element.context
|
|
element
|
|
end
|
|
end
|
|
|
|
alias :<< :add
|
|
|
|
# Iterates through all of the child Elements, optionally filtering
|
|
# them by a given XPath
|
|
# xpath::
|
|
# optional. If supplied, this is a String XPath, and is used to
|
|
# filter the children, so that only matching children are yielded. Note
|
|
# that XPaths are automatically filtered for Elements, so that
|
|
# non-Element children will not be yielded
|
|
# doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>'
|
|
# doc.root.each {|e|p e} #-> Yields b, c, d, b, c, d elements
|
|
# doc.root.each('b') {|e|p e} #-> Yields b, b elements
|
|
# doc.root.each('child::node()') {|e|p e}
|
|
# #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/>
|
|
# XPath.each(doc.root, 'child::node()', &block)
|
|
# #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/>
|
|
def each( xpath=nil, &block)
|
|
XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element }
|
|
end
|
|
|
|
def collect( xpath=nil, &block )
|
|
collection = []
|
|
XPath::each( @element, xpath ) {|e|
|
|
collection << yield(e) if e.kind_of?(Element)
|
|
}
|
|
collection
|
|
end
|
|
|
|
def inject( xpath=nil, initial=nil, &block )
|
|
first = true
|
|
XPath::each( @element, xpath ) {|e|
|
|
if (e.kind_of? Element)
|
|
if (first and initial == nil)
|
|
initial = e
|
|
first = false
|
|
else
|
|
initial = yield( initial, e ) if e.kind_of? Element
|
|
end
|
|
end
|
|
}
|
|
initial
|
|
end
|
|
|
|
# Returns the number of +Element+ children of the parent object.
|
|
# doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>'
|
|
# doc.root.size #-> 6, 3 element and 3 text nodes
|
|
# doc.root.elements.size #-> 3
|
|
def size
|
|
count = 0
|
|
@element.each {|child| count+=1 if child.kind_of? Element }
|
|
count
|
|
end
|
|
|
|
# Returns an Array of Element children. An XPath may be supplied to
|
|
# filter the children. Only Element children are returned, even if the
|
|
# supplied XPath matches non-Element children.
|
|
# doc = Document.new '<a>sean<b/>elliott<c/></a>'
|
|
# doc.root.elements.to_a #-> [ <b/>, <c/> ]
|
|
# doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ]
|
|
# XPath.match(doc.root, "child::node()") #-> [ sean, <b/>, elliott, <c/> ]
|
|
def to_a( xpath=nil )
|
|
rv = XPath.match( @element, xpath )
|
|
return rv.find_all{|e| e.kind_of? Element} if xpath
|
|
rv
|
|
end
|
|
|
|
private
|
|
# Private helper class. Removes quotes from quoted strings
|
|
def literalize name
|
|
name = name[1..-2] if name[0] == ?' or name[0] == ?" #'
|
|
name
|
|
end
|
|
end
|
|
|
|
########################################################################
|
|
# ATTRIBUTES #
|
|
########################################################################
|
|
|
|
# A class that defines the set of Attributes of an Element and provides
|
|
# operations for accessing elements in that set.
|
|
class Attributes < Hash
|
|
# Constructor
|
|
# element:: the Element of which this is an Attribute
|
|
def initialize element
|
|
@element = element
|
|
end
|
|
|
|
# Fetches an attribute value. If you want to get the Attribute itself,
|
|
# use get_attribute()
|
|
# name:: an XPath attribute name. Namespaces are relevant here.
|
|
# Returns::
|
|
# the String value of the matching attribute, or +nil+ if no
|
|
# matching attribute was found. This is the unnormalized value
|
|
# (with entities expanded).
|
|
#
|
|
# doc = Document.new "<a foo:att='1' bar:att='2' att='<'/>"
|
|
# doc.root.attributes['att'] #-> '<'
|
|
# doc.root.attributes['bar:att'] #-> '2'
|
|
def [](name)
|
|
attr = get_attribute(name)
|
|
return attr.value unless attr.nil?
|
|
return nil
|
|
end
|
|
|
|
def to_a
|
|
attributes = []
|
|
each_attribute {|attr| attributes << attr}
|
|
attributes
|
|
end
|
|
|
|
# Returns the number of attributes the owning Element contains.
|
|
# doc = Document "<a x='1' y='2' foo:x='3'/>"
|
|
# doc.root.attributes.length #-> 3
|
|
def length
|
|
c = 0
|
|
each_attribute { c+=1 }
|
|
c
|
|
end
|
|
alias :size :length
|
|
|
|
# Itterates over the attributes of an Element. Yields actual Attribute
|
|
# nodes, not String values.
|
|
#
|
|
# doc = Document.new '<a x="1" y="2"/>'
|
|
# doc.root.attributes.each_attribute {|attr|
|
|
# p attr.expanded_name+" => "+attr.value
|
|
# }
|
|
def each_attribute # :yields: attribute
|
|
each_value do |val|
|
|
if val.kind_of? Attribute
|
|
yield val
|
|
else
|
|
val.each_value { |atr| yield atr }
|
|
end
|
|
end
|
|
end
|
|
|
|
# Itterates over each attribute of an Element, yielding the expanded name
|
|
# and value as a pair of Strings.
|
|
#
|
|
# doc = Document.new '<a x="1" y="2"/>'
|
|
# doc.root.attributes.each {|name, value| p name+" => "+value }
|
|
def each
|
|
each_attribute do |attr|
|
|
yield [attr.expanded_name, attr.value]
|
|
end
|
|
end
|
|
|
|
# Fetches an attribute
|
|
# name::
|
|
# the name by which to search for the attribute. Can be a
|
|
# <tt>prefix:name</tt> namespace name.
|
|
# Returns:: The first matching attribute, or nil if there was none. This
|
|
# value is an Attribute node, not the String value of the attribute.
|
|
# doc = Document.new '<a x:foo="1" foo="2" bar="3"/>'
|
|
# doc.root.attributes.get_attribute("foo").value #-> "2"
|
|
# doc.root.attributes.get_attribute("x:foo").value #-> "1"
|
|
def get_attribute( name )
|
|
attr = fetch( name, nil )
|
|
if attr.nil?
|
|
return nil if name.nil?
|
|
# Look for prefix
|
|
name =~ Namespace::NAMESPLIT
|
|
prefix, n = $1, $2
|
|
if prefix
|
|
attr = fetch( n, nil )
|
|
# check prefix
|
|
if attr == nil
|
|
elsif attr.kind_of? Attribute
|
|
return attr if prefix == attr.prefix
|
|
else
|
|
attr = attr[ prefix ]
|
|
return attr
|
|
end
|
|
end
|
|
element_document = @element.document
|
|
if element_document and element_document.doctype
|
|
expn = @element.expanded_name
|
|
expn = element_document.doctype.name if expn.size == 0
|
|
attr_val = element_document.doctype.attribute_of(expn, name)
|
|
return Attribute.new( name, attr_val ) if attr_val
|
|
end
|
|
return nil
|
|
end
|
|
if attr.kind_of? Hash
|
|
attr = attr[ @element.prefix ]
|
|
end
|
|
return attr
|
|
end
|
|
|
|
# Sets an attribute, overwriting any existing attribute value by the
|
|
# same name. Namespace is significant.
|
|
# name:: the name of the attribute
|
|
# value::
|
|
# (optional) If supplied, the value of the attribute. If
|
|
# nil, any existing matching attribute is deleted.
|
|
# Returns::
|
|
# Owning element
|
|
# doc = Document.new "<a x:foo='1' foo='3'/>"
|
|
# doc.root.attributes['y:foo'] = '2'
|
|
# doc.root.attributes['foo'] = '4'
|
|
# doc.root.attributes['x:foo'] = nil
|
|
def []=( name, value )
|
|
if value.nil? # Delete the named attribute
|
|
attr = get_attribute(name)
|
|
delete attr
|
|
return
|
|
end
|
|
element_document = @element.document
|
|
unless value.kind_of? Attribute
|
|
if @element.document and @element.document.doctype
|
|
value = Text::normalize( value, @element.document.doctype )
|
|
else
|
|
value = Text::normalize( value, nil )
|
|
end
|
|
value = Attribute.new(name, value)
|
|
end
|
|
value.element = @element
|
|
old_attr = fetch(value.name, nil)
|
|
if old_attr.nil?
|
|
store(value.name, value)
|
|
elsif old_attr.kind_of? Hash
|
|
old_attr[value.prefix] = value
|
|
elsif old_attr.prefix != value.prefix
|
|
# Check for conflicting namespaces
|
|
raise ParseException.new(
|
|
"Namespace conflict in adding attribute \"#{value.name}\": "+
|
|
"Prefix \"#{old_attr.prefix}\" = "+
|
|
"\"#{@element.namespace(old_attr.prefix)}\" and prefix "+
|
|
"\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if
|
|
value.prefix != "xmlns" and old_attr.prefix != "xmlns" and
|
|
@element.namespace( old_attr.prefix ) ==
|
|
@element.namespace( value.prefix )
|
|
store value.name, { old_attr.prefix => old_attr,
|
|
value.prefix => value }
|
|
else
|
|
store value.name, value
|
|
end
|
|
return @element
|
|
end
|
|
|
|
# Returns an array of Strings containing all of the prefixes declared
|
|
# by this set of # attributes. The array does not include the default
|
|
# namespace declaration, if one exists.
|
|
# doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+
|
|
# "z='glorp' p:k='gru'/>")
|
|
# prefixes = doc.root.attributes.prefixes #-> ['x', 'y']
|
|
def prefixes
|
|
ns = []
|
|
each_attribute do |attribute|
|
|
ns << attribute.name if attribute.prefix == 'xmlns'
|
|
end
|
|
if @element.document and @element.document.doctype
|
|
expn = @element.expanded_name
|
|
expn = @element.document.doctype.name if expn.size == 0
|
|
@element.document.doctype.attributes_of(expn).each {
|
|
|attribute|
|
|
ns << attribute.name if attribute.prefix == 'xmlns'
|
|
}
|
|
end
|
|
ns
|
|
end
|
|
|
|
def namespaces
|
|
namespaces = {}
|
|
each_attribute do |attribute|
|
|
namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
|
|
end
|
|
if @element.document and @element.document.doctype
|
|
expn = @element.expanded_name
|
|
expn = @element.document.doctype.name if expn.size == 0
|
|
@element.document.doctype.attributes_of(expn).each {
|
|
|attribute|
|
|
namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
|
|
}
|
|
end
|
|
namespaces
|
|
end
|
|
|
|
# Removes an attribute
|
|
# attribute::
|
|
# either a String, which is the name of the attribute to remove --
|
|
# namespaces are significant here -- or the attribute to remove.
|
|
# Returns:: the owning element
|
|
# doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>"
|
|
# doc.root.attributes.delete 'foo' #-> <a y:foo='0' x:foo='1' z:foo='4'/>"
|
|
# doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>"
|
|
# attr = doc.root.attributes.get_attribute('y:foo')
|
|
# doc.root.attributes.delete attr #-> <a z:foo='4'/>"
|
|
def delete( attribute )
|
|
name = nil
|
|
prefix = nil
|
|
if attribute.kind_of? Attribute
|
|
name = attribute.name
|
|
prefix = attribute.prefix
|
|
else
|
|
attribute =~ Namespace::NAMESPLIT
|
|
prefix, name = $1, $2
|
|
prefix = '' unless prefix
|
|
end
|
|
old = fetch(name, nil)
|
|
attr = nil
|
|
if old.kind_of? Hash # the supplied attribute is one of many
|
|
attr = old.delete(prefix)
|
|
if old.size == 1
|
|
repl = nil
|
|
old.each_value{|v| repl = v}
|
|
store name, repl
|
|
end
|
|
elsif old.nil?
|
|
return @element
|
|
else # the supplied attribute is a top-level one
|
|
attr = old
|
|
res = super(name)
|
|
end
|
|
@element
|
|
end
|
|
|
|
# Adds an attribute, overriding any existing attribute by the
|
|
# same name. Namespaces are significant.
|
|
# attribute:: An Attribute
|
|
def add( attribute )
|
|
self[attribute.name] = attribute
|
|
end
|
|
|
|
alias :<< :add
|
|
|
|
# Deletes all attributes matching a name. Namespaces are significant.
|
|
# name::
|
|
# A String; all attributes that match this path will be removed
|
|
# Returns:: an Array of the Attributes that were removed
|
|
def delete_all( name )
|
|
rv = []
|
|
each_attribute { |attribute|
|
|
rv << attribute if attribute.expanded_name == name
|
|
}
|
|
rv.each{ |attr| attr.remove }
|
|
return rv
|
|
end
|
|
|
|
# The +get_attribute_ns+ method retrieves a method by its namespace
|
|
# and name. Thus it is possible to reliably identify an attribute
|
|
# even if an XML processor has changed the prefix.
|
|
#
|
|
# Method contributed by Henrik Martensson
|
|
def get_attribute_ns(namespace, name)
|
|
result = nil
|
|
each_attribute() { |attribute|
|
|
if name == attribute.name &&
|
|
namespace == attribute.namespace()
|
|
# foo will match xmlns:foo, but only if foo isn't also an attribute
|
|
result = attribute if !result or !namespace.empty? or
|
|
!attribute.fully_expanded_name.index(':')
|
|
end
|
|
}
|
|
result
|
|
end
|
|
end
|
|
end
|