Bundle Latest REXML

Sam Ruby has been doing a bang-up job fixing the bugs in REXML.
Who knows when these improvements will trickle down to vendor distributions of Ruby.
In the meantime, let's bundle the latest version of REXML with Instiki.
We check the version number of the bundled REXML against that of the System REXML, and use whichever is later.
This commit is contained in:
Jacques Distler 2008-01-11 23:53:29 -06:00
parent 1085168bbf
commit 38ae064b8a
146 changed files with 11105 additions and 136 deletions

View file

@ -0,0 +1,167 @@
# FUNDAMENTAL FLAWS:
# The order of $: must be preserved. Therefore, there are two sort
# criteria: Versioned files are sorted high; after that, the sort order
# is the order of $:.
# We must preserve the load path; if rexml-2.4 is required in one place,
# all rexml/* packages should be loaded from there.
############################################################################
# #
# This is based on Phil Tomson's #
# ("ptkwt!shell1#aracnet#com".tr("!#","@.")) #
# code. The changes I made are: #
# 1) The Version class is now a member of the Kernel module, to avoid #
# name space conflicts. #
# 2) Version::to_s() returns the original string, not a comma-separated #
# string. #
# 3) The versioning is package based, not file based. In fact, with #
# this, you can't version individual files. AFAIC, this is better, #
# since versioning on individual files is much more tedious than #
# package-based versioning, and it is arguably less useful and less #
# commonly desired. #
# 4) Versions can have arbitrary length. EG: 2.7 < 2.7.1, and "2" #
# matches any version that starts with "2", such as "2.5.2.6.7" #
# #
# The rules are these: #
# 1) All of the locations in $: will be searched #
# 2) The highest version of the package found that satisfies the #
# requirements will be used. #
# 3) If there is no versioned package, or no version matches, we default #
# to the normal Ruby require mechanism. This maintains backward #
# compatible behavior. #
# 4) The packages must be installed as foo-x.y.z. The cardinality of #
# the version is not significant, and packages that do not match this #
# naming pattern match by default. #
# #
# Rule (1) and (2) mean that the highest matching version anywhere in #
# the search path will be used. Rule (3) and (4) mean that even if #
# packages are not installed with this naming convention, programs that #
# use require_version will still work. #
# #
# Usage: #
# To use this, require this module. Then use require_with_ver, instead #
# of require in your files. #
# #
# Examples: #
# require_version('rexml/document'){|v| v > '2.0' and v < '2.5'} #
# require_version('rexml/document'){|v| v > '2.0'} #
# require_version('rexml/document') #
# require_version('rexml/document'){|v| v > '2.0'} #
# require_version('rexml/document'){|v| v >= '1.0' and v < '2.0'} #
# require_version('rexml/document'){|v| v >= '1.0' and v < '2.0' and #
# v != '1.7'} #
# require_version('rexml/document'){|v| (v >= '1.0' and #
# v < '2.0' and #
# v != '1.7') or #
# v == '3.0.1'} #
# require_version('rexml/document'){|v| v.to_s =~ /^2.[02468]/} #
# #
############################################################################
module Kernel
#########################################################
# Version - takes a string in the form: 'X1.X2.X3...Xn' #
# (where 'Xn' is a number) #
#########################################################
class Version
include Comparable
def initialize(str)
@vs = str.split('.').map!{|i| i.to_i}
end
def [](i)
@vs[i]
end
def to_s
@vs.join('.')
end
def <=>(other)
if other.class == String
other = Version.new(other)
end
@vs.each_with_index { |v,i|
return 1 unless other[i]
unless v == other[i]
return v <=> other[i]
end
}
return 0
end
end
alias :old_require :require
@@__versioned__ = {}
def require(file,&b)
path = file.split('/')
root = path[0]
rest = path[1..-1].join('/')
unless @@__versioned__[root]
package = File.dirname( file )
files = []
$:.each {|dir|
if File.exists? dir
fileset = Dir.new(dir).entries.delete_if {|f|
fpath = File.join( dir, f )
!(File.directory?(fpath) and f =~ /^#{root}(-\d(\.\d)*)?$/)
}
fileset.collect!{ |f| File.join( dir, f ) }
files += fileset
end
}
if files.size > 0
@@__versioned__[root] = files.uniq.sort{|x,y|
File.basename(x) <=> File.basename(y)
}
@@__versioned__[root].reverse!
else
@@__versioned__[root] = [root]
end
end
base = @@__versioned__[root][0]
if b #block_given?
p @@__versioned__[root]
base = @@__versioned__[root].delete_if { |f|
l = File.basename(f)
l.include?('-') and yield( Version.new( l.split('-')[1] ) ) and
Dir.new(f).entries.include?( rest+".rb" ) ? false : true
}
p base
base = base[0]
end
#old_require "#{base}/#{rest}"
puts <<-EOL
old_require "#{base}/#{rest}"
EOL
end
end
#=begin
# For testing
if $0 == __FILE__
$: << "./"
puts "\n\nv > '2.0' and v < '2.5'"
require('rexml/document'){|v| v > '2.0' and v < '2.5'}
puts "\n\nv > '2.0' and v < '3'"
require('rexml/document'){|v| v > '2.0' and v < '3'}
=begin
puts "\n\nv > '2.0'"
require('rexml/document'){|v| v > '2.0'}
require('rexml/document')
puts "\n\nv > '2.0'"
require('rexml/document'){|v| v > '2.0'}
puts "\n\nv >= '1.0' and v < '2.0'"
require('rexml/document'){|v| v >= '1.0' and v < '2.0'}
puts "\n\nv >= '1.0' and v < '2.0' and v != '1.7'"
require('rexml/document'){|v| v >= '1.0' and v < '2.0' and v != '1.7'}
require('rexml/document'){|v| (v >= '1.0' and
v < '2.0' and
v != '1.7') or
v == '3.0.1'}
puts "\n\nv.to_s =~ /^2.[02468]/"
require('rexml/document'){|v| v.to_s =~ /^2.[02468]/}
require('rexml/parsers/baseparser' )
=end
end
#=end

View file

@ -0,0 +1,62 @@
#vim:ts=2 sw=2 noexpandtab:
require 'rexml/child'
require 'rexml/source'
module REXML
# This class needs:
# * Documentation
# * Work! Not all types of attlists are intelligently parsed, so we just
# spew back out what we get in. This works, but it would be better if
# we formatted the output ourselves.
#
# AttlistDecls provide *just* enough support to allow namespace
# declarations. If you need some sort of generalized support, or have an
# interesting idea about how to map the hideous, terrible design of DTD
# AttlistDecls onto an intuitive Ruby interface, let me know. I'm desperate
# for anything to make DTDs more palateable.
class AttlistDecl < Child
include Enumerable
# What is this? Got me.
attr_reader :element_name
# Create an AttlistDecl, pulling the information from a Source. Notice
# that this isn't very convenient; to create an AttlistDecl, you basically
# have to format it yourself, and then have the initializer parse it.
# Sorry, but for the forseeable future, DTD support in REXML is pretty
# weak on convenience. Have I mentioned how much I hate DTDs?
def initialize(source)
super()
if (source.kind_of? Array)
@element_name, @pairs, @contents = *source
end
end
# Access the attlist attribute/value pairs.
# value = attlist_decl[ attribute_name ]
def [](key)
@pairs[key]
end
# Whether an attlist declaration includes the given attribute definition
# if attlist_decl.include? "xmlns:foobar"
def include?(key)
@pairs.keys.include? key
end
# Itterate over the key/value pairs:
# attlist_decl.each { |attribute_name, attribute_value| ... }
def each(&block)
@pairs.each(&block)
end
# Write out exactly what we got in.
def write out, indent=-1
out << @contents
end
def node_type
:attlistdecl
end
end
end

View file

@ -0,0 +1,188 @@
require "rexml/namespace"
require 'rexml/text'
module REXML
# Defines an Element Attribute; IE, a attribute=value pair, as in:
# <element attribute="value"/>. Attributes can be in their own
# namespaces. General users of REXML will not interact with the
# Attribute class much.
class Attribute
include Node
include Namespace
# The element to which this attribute belongs
attr_reader :element
# The normalized value of this attribute. That is, the attribute with
# entities intact.
attr_writer :normalized
PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
# Constructor.
# FIXME: The parser doesn't catch illegal characters in attributes
#
# first::
# Either: an Attribute, which this new attribute will become a
# clone of; or a String, which is the name of this attribute
# second::
# If +first+ is an Attribute, then this may be an Element, or nil.
# If nil, then the Element parent of this attribute is the parent
# of the +first+ Attribute. If the first argument is a String,
# then this must also be a String, and is the content of the attribute.
# If this is the content, it must be fully normalized (contain no
# illegal characters).
# parent::
# Ignored unless +first+ is a String; otherwise, may be the Element
# parent of this attribute, or nil.
#
#
# Attribute.new( attribute_to_clone )
# Attribute.new( attribute_to_clone, parent_element )
# Attribute.new( "attr", "attr_value" )
# Attribute.new( "attr", "attr_value", parent_element )
def initialize( first, second=nil, parent=nil )
@normalized = @unnormalized = @element = nil
if first.kind_of? Attribute
self.name = first.expanded_name
@unnormalized = first.value
if second.kind_of? Element
@element = second
else
@element = first.element
end
elsif first.kind_of? String
@element = parent
self.name = first
@normalized = second.to_s
else
raise "illegal argument #{first.class.name} to Attribute constructor"
end
end
# Returns the namespace of the attribute.
#
# e = Element.new( "elns:myelement" )
# e.add_attribute( "nsa:a", "aval" )
# e.add_attribute( "b", "bval" )
# e.attributes.get_attribute( "a" ).prefix # -> "nsa"
# e.attributes.get_attribute( "b" ).prefix # -> "elns"
# a = Attribute.new( "x", "y" )
# a.prefix # -> ""
def prefix
pf = super
if pf == ""
pf = @element.prefix if @element
end
pf
end
# Returns the namespace URL, if defined, or nil otherwise
#
# e = Element.new("el")
# e.add_attributes({"xmlns:ns", "http://url"})
# e.namespace( "ns" ) # -> "http://url"
def namespace arg=nil
arg = prefix if arg.nil?
@element.namespace arg
end
# Returns true if other is an Attribute and has the same name and value,
# false otherwise.
def ==( other )
other.kind_of?(Attribute) and other.name==name and other.value==value
end
# Creates (and returns) a hash from both the name and value
def hash
name.hash + value.hash
end
# Returns this attribute out as XML source, expanding the name
#
# a = Attribute.new( "x", "y" )
# a.to_string # -> "x='y'"
# b = Attribute.new( "ns:x", "y" )
# b.to_string # -> "ns:x='y'"
def to_string
if @element and @element.context and @element.context[:attribute_quote] == :quote
%Q^#@expanded_name="#{to_s().gsub(/"/, '&quote;')}"^
else
"#@expanded_name='#{to_s().gsub(/'/, '&apos;')}'"
end
end
def doctype
if @element
doc = @element.document
doctype = doc.doctype if doc
end
end
# Returns the attribute value, with entities replaced
def to_s
return @normalized if @normalized
@normalized = Text::normalize( @unnormalized, doctype )
@unnormalized = nil
@normalized
end
# Returns the UNNORMALIZED value of this attribute. That is, entities
# have been expanded to their values
def value
return @unnormalized if @unnormalized
@unnormalized = Text::unnormalize( @normalized, doctype )
@normalized = nil
@unnormalized
end
# Returns a copy of this attribute
def clone
Attribute.new self
end
# Sets the element of which this object is an attribute. Normally, this
# is not directly called.
#
# Returns this attribute
def element=( element )
@element = element
if @normalized
Text.check( @normalized, NEEDS_A_SECOND_CHECK, doctype )
end
self
end
# Removes this Attribute from the tree, and returns true if successfull
#
# This method is usually not called directly.
def remove
@element.attributes.delete self.name unless @element.nil?
end
# Writes this attribute (EG, puts 'key="value"' to the output)
def write( output, indent=-1 )
output << to_string
end
def node_type
:attribute
end
def inspect
rv = ""
write( rv )
rv
end
def xpath
path = @element.xpath
path += "/@#{self.expanded_name}"
return path
end
end
end
#vim:ts=2 sw=2 noexpandtab:

67
vendor/plugins/rexml/lib/rexml/cdata.rb vendored Normal file
View file

@ -0,0 +1,67 @@
require "rexml/text"
module REXML
class CData < Text
START = '<![CDATA['
STOP = ']]>'
ILLEGAL = /(\]\]>)/
# Constructor. CData is data between <![CDATA[ ... ]]>
#
# _Examples_
# CData.new( source )
# CData.new( "Here is some CDATA" )
# CData.new( "Some unprocessed data", respect_whitespace_TF, parent_element )
def initialize( first, whitespace=true, parent=nil )
super( first, whitespace, parent, false, true, ILLEGAL )
end
# Make a copy of this object
#
# _Examples_
# c = CData.new( "Some text" )
# d = c.clone
# d.to_s # -> "Some text"
def clone
CData.new self
end
# Returns the content of this CData object
#
# _Examples_
# c = CData.new( "Some text" )
# c.to_s # -> "Some text"
def to_s
@string
end
def value
@string
end
# == DEPRECATED
# See the rexml/formatters package
#
# Generates XML output of this object
#
# output::
# Where to write the string. Defaults to $stdout
# indent::
# The amount to indent this node by
# transitive::
# Ignored
# ie_hack::
# Ignored
#
# _Examples_
# c = CData.new( " Some text " )
# c.write( $stdout ) #-> <![CDATA[ Some text ]]>
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
Kernel.warn( "#{self.class.name}.write is deprecated" )
indent( output, indent )
output << START
output << @string
output << STOP
end
end
end

96
vendor/plugins/rexml/lib/rexml/child.rb vendored Normal file
View file

@ -0,0 +1,96 @@
require "rexml/node"
module REXML
##
# A Child object is something contained by a parent, and this class
# contains methods to support that. Most user code will not use this
# class directly.
class Child
include Node
attr_reader :parent # The Parent of this object
# Constructor. Any inheritors of this class should call super to make
# sure this method is called.
# parent::
# if supplied, the parent of this child will be set to the
# supplied value, and self will be added to the parent
def initialize( parent = nil )
@parent = nil
# Declare @parent, but don't define it. The next line sets the
# parent.
parent.add( self ) if parent
end
# Replaces this object with another object. Basically, calls
# Parent.replace_child
#
# Returns:: self
def replace_with( child )
@parent.replace_child( self, child )
self
end
# Removes this child from the parent.
#
# Returns:: self
def remove
unless @parent.nil?
@parent.delete self
end
self
end
# Sets the parent of this child to the supplied argument.
#
# other::
# Must be a Parent object. If this object is the same object as the
# existing parent of this child, no action is taken. Otherwise, this
# child is removed from the current parent (if one exists), and is added
# to the new parent.
# Returns:: The parent added
def parent=( other )
return @parent if @parent == other
@parent.delete self if defined? @parent and @parent
@parent = other
end
alias :next_sibling :next_sibling_node
alias :previous_sibling :previous_sibling_node
# Sets the next sibling of this child. This can be used to insert a child
# after some other child.
# a = Element.new("a")
# b = a.add_element("b")
# c = Element.new("c")
# b.next_sibling = c
# # => <a><b/><c/></a>
def next_sibling=( other )
parent.insert_after self, other
end
# Sets the previous sibling of this child. This can be used to insert a
# child before some other child.
# a = Element.new("a")
# b = a.add_element("b")
# c = Element.new("c")
# b.previous_sibling = c
# # => <a><b/><c/></a>
def previous_sibling=(other)
parent.insert_before self, other
end
# Returns:: the document this child belongs to, or nil if this child
# belongs to no document
def document
return parent.document unless parent.nil?
nil
end
# This doesn't yet handle encodings
def bytes
encoding = document.encoding
to_s
end
end
end

View file

@ -0,0 +1,80 @@
require "rexml/child"
module REXML
##
# Represents an XML comment; that is, text between \<!-- ... -->
class Comment < Child
include Comparable
START = "<!--"
STOP = "-->"
# The content text
attr_accessor :string
##
# Constructor. The first argument can be one of three types:
# @param first If String, the contents of this comment are set to the
# argument. If Comment, the argument is duplicated. If
# Source, the argument is scanned for a comment.
# @param second If the first argument is a Source, this argument
# should be nil, not supplied, or a Parent to be set as the parent
# of this object
def initialize( first, second = nil )
#puts "IN COMMENT CONSTRUCTOR; SECOND IS #{second.type}"
super(second)
if first.kind_of? String
@string = first
elsif first.kind_of? Comment
@string = first.string
end
end
def clone
Comment.new self
end
# == DEPRECATED
# See REXML::Formatters
#
# output::
# Where to write the string
# indent::
# An integer. If -1, no indenting will be used; otherwise, the
# indentation will be this number of spaces, and children will be
# indented an additional amount.
# transitive::
# Ignored by this class. The contents of comments are never modified.
# ie_hack::
# Needed for conformity to the child API, but not used by this class.
def write( output, indent=-1, transitive=false, ie_hack=false )
Kernel.warn("Comment.write is deprecated. See REXML::Formatters")
indent( output, indent )
output << START
output << @string
output << STOP
end
alias :to_s :string
##
# Compares this Comment to another; the contents of the comment are used
# in the comparison.
def <=>(other)
other.to_s <=> @string
end
##
# Compares this Comment to another; the contents of the comment are used
# in the comparison.
def ==( other )
other.kind_of? Comment and
(other <=> self) == 0
end
def node_type
:comment
end
end
end
#vim:ts=2 sw=2 noexpandtab:

View file

@ -0,0 +1,270 @@
require "rexml/parent"
require "rexml/parseexception"
require "rexml/namespace"
require 'rexml/entity'
require 'rexml/attlistdecl'
require 'rexml/xmltokens'
module REXML
# Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
# ... >. DOCTYPES can be used to declare the DTD of a document, as well as
# being used to declare entities used in the document.
class DocType < Parent
include XMLTokens
START = "<!DOCTYPE"
STOP = ">"
SYSTEM = "SYSTEM"
PUBLIC = "PUBLIC"
DEFAULT_ENTITIES = {
'gt'=>EntityConst::GT,
'lt'=>EntityConst::LT,
'quot'=>EntityConst::QUOT,
"apos"=>EntityConst::APOS
}
# name is the name of the doctype
# external_id is the referenced DTD, if given
attr_reader :name, :external_id, :entities, :namespaces
# Constructor
#
# dt = DocType.new( 'foo', '-//I/Hate/External/IDs' )
# # <!DOCTYPE foo '-//I/Hate/External/IDs'>
# dt = DocType.new( doctype_to_clone )
# # Incomplete. Shallow clone of doctype
#
# +Note+ that the constructor:
#
# Doctype.new( Source.new( "<!DOCTYPE foo 'bar'>" ) )
#
# is _deprecated_. Do not use it. It will probably disappear.
def initialize( first, parent=nil )
@entities = DEFAULT_ENTITIES
@long_name = @uri = nil
if first.kind_of? String
super()
@name = first
@external_id = parent
elsif first.kind_of? DocType
super( parent )
@name = first.name
@external_id = first.external_id
elsif first.kind_of? Array
super( parent )
@name = first[0]
@external_id = first[1]
@long_name = first[2]
@uri = first[3]
elsif first.kind_of? Source
super( parent )
parser = Parsers::BaseParser.new( first )
event = parser.pull
if event[0] == :start_doctype
@name, @external_id, @long_name, @uri, = event[1..-1]
end
else
super()
end
end
def node_type
:doctype
end
def attributes_of element
rv = []
each do |child|
child.each do |key,val|
rv << Attribute.new(key,val)
end if child.kind_of? AttlistDecl and child.element_name == element
end
rv
end
def attribute_of element, attribute
att_decl = find do |child|
child.kind_of? AttlistDecl and
child.element_name == element and
child.include? attribute
end
return nil unless att_decl
att_decl[attribute]
end
def clone
DocType.new self
end
# output::
# Where to write the string
# indent::
# An integer. If -1, no indentation will be used; otherwise, the
# indentation will be this number of spaces, and children will be
# indented an additional amount.
# transitive::
# Ignored
# ie_hack::
# Ignored
def write( output, indent=0, transitive=false, ie_hack=false )
f = REXML::Formatters::Default.new
indent( output, indent )
output << START
output << ' '
output << @name
output << " #@external_id" if @external_id
output << " #{@long_name.inspect}" if @long_name
output << " #{@uri.inspect}" if @uri
unless @children.empty?
next_indent = indent + 1
output << ' ['
@children.each { |child|
output << "\n"
f.write( child, output )
}
output << "\n]"
end
output << STOP
end
def context
@parent.context
end
def entity( name )
@entities[name].unnormalized if @entities[name]
end
def add child
super(child)
@entities = DEFAULT_ENTITIES.clone if @entities == DEFAULT_ENTITIES
@entities[ child.name ] = child if child.kind_of? Entity
end
# This method retrieves the public identifier identifying the document's
# DTD.
#
# Method contributed by Henrik Martensson
def public
case @external_id
when "SYSTEM"
nil
when "PUBLIC"
strip_quotes(@long_name)
end
end
# This method retrieves the system identifier identifying the document's DTD
#
# Method contributed by Henrik Martensson
def system
case @external_id
when "SYSTEM"
strip_quotes(@long_name)
when "PUBLIC"
@uri.kind_of?(String) ? strip_quotes(@uri) : nil
end
end
# This method returns a list of notations that have been declared in the
# _internal_ DTD subset. Notations in the external DTD subset are not
# listed.
#
# Method contributed by Henrik Martensson
def notations
children().select {|node| node.kind_of?(REXML::NotationDecl)}
end
# Retrieves a named notation. Only notations declared in the internal
# DTD subset can be retrieved.
#
# Method contributed by Henrik Martensson
def notation(name)
notations.find { |notation_decl|
notation_decl.name == name
}
end
private
# Method contributed by Henrik Martensson
def strip_quotes(quoted_string)
quoted_string =~ /^[\'\"].*[\´\"]$/ ?
quoted_string[1, quoted_string.length-2] :
quoted_string
end
end
# We don't really handle any of these since we're not a validating
# parser, so we can be pretty dumb about them. All we need to be able
# to do is spew them back out on a write()
# This is an abstract class. You never use this directly; it serves as a
# parent class for the specific declarations.
class Declaration < Child
def initialize src
super()
@string = src
end
def to_s
@string+'>'
end
# == DEPRECATED
# See REXML::Formatters
#
def write( output, indent )
output << to_s
end
end
public
class ElementDecl < Declaration
def initialize( src )
super
end
end
class ExternalEntity < Child
def initialize( src )
super()
@entity = src
end
def to_s
@entity
end
def write( output, indent )
output << @entity
end
end
class NotationDecl < Child
attr_accessor :public, :system
def initialize name, middle, pub, sys
super(nil)
@name = name
@middle = middle
@public = pub
@system = sys
end
def to_s
"<!NOTATION #@name #@middle#{
@public ? ' ' + public.inspect : ''
}#{
@system ? ' ' +@system.inspect : ''
}>"
end
def write( output, indent=-1 )
output << to_s
end
# This method retrieves the name of the notation.
#
# Method contributed by Henrik Martensson
def name
@name
end
end
end

View file

@ -0,0 +1,208 @@
require "rexml/element"
require "rexml/xmldecl"
require "rexml/source"
require "rexml/comment"
require "rexml/doctype"
require "rexml/instruction"
require "rexml/rexml"
require "rexml/parseexception"
require "rexml/output"
require "rexml/parsers/baseparser"
require "rexml/parsers/streamparser"
require "rexml/parsers/treeparser"
module REXML
# Represents a full XML document, including PIs, a doctype, etc. A
# Document has a single child that can be accessed by root().
# Note that if you want to have an XML declaration written for a document
# you create, you must add one; REXML documents do not write a default
# declaration for you. See |DECLARATION| and |write|.
class Document < Element
# A convenient default XML declaration. If you want an XML declaration,
# the easiest way to add one is mydoc << Document::DECLARATION
# +DEPRECATED+
# Use: mydoc << XMLDecl.default
DECLARATION = XMLDecl.default
# Constructor
# @param source if supplied, must be a Document, String, or IO.
# Documents have their context and Element attributes cloned.
# Strings are expected to be valid XML documents. IOs are expected
# to be sources of valid XML documents.
# @param context if supplied, contains the context of the document;
# this should be a Hash.
def initialize( source = nil, context = {} )
super()
@context = context
return if source.nil?
if source.kind_of? Document
@context = source.context
super source
else
build( source )
end
end
def node_type
:document
end
# Should be obvious
def clone
Document.new self
end
# According to the XML spec, a root node has no expanded name
def expanded_name
''
#d = doc_type
#d ? d.name : "UNDEFINED"
end
alias :name :expanded_name
# We override this, because XMLDecls and DocTypes must go at the start
# of the document
def add( child )
if child.kind_of? XMLDecl
@children.unshift child
child.parent = self
elsif child.kind_of? DocType
# Find first Element or DocType node and insert the decl right
# before it. If there is no such node, just insert the child at the
# end. If there is a child and it is an DocType, then replace it.
insert_before_index = 0
@children.find { |x|
insert_before_index += 1
x.kind_of?(Element) || x.kind_of?(DocType)
}
if @children[ insert_before_index ] # Not null = not end of list
if @children[ insert_before_index ].kind_of DocType
@children[ insert_before_index ] = child
else
@children[ index_before_index-1, 0 ] = child
end
else # Insert at end of list
@children[insert_before_index] = child
end
child.parent = self
else
rv = super
raise "attempted adding second root element to document" if @elements.size > 1
rv
end
end
alias :<< :add
def add_element(arg=nil, arg2=nil)
rv = super
raise "attempted adding second root element to document" if @elements.size > 1
rv
end
# @return the root Element of the document, or nil if this document
# has no children.
def root
elements[1]
#self
#@children.find { |item| item.kind_of? Element }
end
# @return the DocType child of the document, if one exists,
# and nil otherwise.
def doctype
@children.find { |item| item.kind_of? DocType }
end
# @return the XMLDecl of this document; if no XMLDecl has been
# set, the default declaration is returned.
def xml_decl
rv = @children[0]
return rv if rv.kind_of? XMLDecl
rv = @children.unshift(XMLDecl.default)[0]
end
# @return the XMLDecl version of this document as a String.
# If no XMLDecl has been set, returns the default version.
def version
xml_decl().version
end
# @return the XMLDecl encoding of this document as a String.
# If no XMLDecl has been set, returns the default encoding.
def encoding
xml_decl().encoding
end
# @return the XMLDecl standalone value of this document as a String.
# If no XMLDecl has been set, returns the default setting.
def stand_alone?
xml_decl().stand_alone?
end
# Write the XML tree out, optionally with indent. This writes out the
# entire XML document, including XML declarations, doctype declarations,
# and processing instructions (if any are given).
#
# A controversial point is whether Document should always write the XML
# declaration (<?xml version='1.0'?>) whether or not one is given by the
# user (or source document). REXML does not write one if one was not
# specified, because it adds unneccessary bandwidth to applications such
# as XML-RPC.
#
# See also the classes in the rexml/formatters package for the proper way
# to change the default formatting of XML output
#
# _Examples_
# Document.new("<a><b/></a>").serialize
#
# output_string = ""
# tr = Transitive.new( output_string )
# Document.new("<a><b/></a>").serialize( tr )
#
# output::
# output an object which supports '<< string'; this is where the
# document will be written.
# indent::
# An integer. If -1, no indenting will be used; otherwise, the
# indentation will be twice this number of spaces, and children will be
# indented an additional amount. For a value of 3, every item will be
# indented 3 more levels, or 6 more spaces (2 * 3). Defaults to -1
# trans::
# If transitive is true and indent is >= 0, then the output will be
# pretty-printed in such a way that the added whitespace does not affect
# the absolute *value* of the document -- that is, it leaves the value
# and number of Text nodes in the document unchanged.
# ie_hack::
# Internet Explorer is the worst piece of crap to have ever been
# written, with the possible exception of Windows itself. Since IE is
# unable to parse proper XML, we have to provide a hack to generate XML
# that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags. Defaults to false
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
output = Output.new( output, xml_decl.encoding )
end
formatter = if indent > -1
if trans
REXML::Formatters::Transitive.new( indent, ie_hack )
else
REXML::Formatters::Pretty.new( indent, ie_hack )
end
else
REXML::Formatters::Default.new( ie_hack )
end
formatter.write( self, output )
end
def Document::parse_stream( source, listener )
Parsers::StreamParser.new( source, listener ).parse
end
private
def build( source )
Parsers::TreeParser.new( source, self ).parse
end
end
end

View file

@ -0,0 +1,10 @@
require "rexml/child"
module REXML
module DTD
class AttlistDecl < Child
START = "<!ATTLIST"
START_RE = /^\s*#{START}/um
PATTERN_RE = /\s*(#{START}.*?>)/um
end
end
end

View file

@ -0,0 +1,51 @@
require "rexml/dtd/elementdecl"
require "rexml/dtd/entitydecl"
require "rexml/comment"
require "rexml/dtd/notationdecl"
require "rexml/dtd/attlistdecl"
require "rexml/parent"
module REXML
module DTD
class Parser
def Parser.parse( input )
case input
when String
parse_helper input
when File
parse_helper input.read
end
end
# Takes a String and parses it out
def Parser.parse_helper( input )
contents = Parent.new
while input.size > 0
case input
when ElementDecl.PATTERN_RE
match = $&
source = $'
contents << ElementDecl.new( match )
when AttlistDecl.PATTERN_RE
matchdata = $~
source = $'
contents << AttlistDecl.new( matchdata )
when EntityDecl.PATTERN_RE
matchdata = $~
source = $'
contents << EntityDecl.new( matchdata )
when Comment.PATTERN_RE
matchdata = $~
source = $'
contents << Comment.new( matchdata )
when NotationDecl.PATTERN_RE
matchdata = $~
source = $'
contents << NotationDecl.new( matchdata )
end
end
contents
end
end
end
end

View file

@ -0,0 +1,17 @@
require "rexml/child"
module REXML
module DTD
class ElementDecl < Child
START = "<!ELEMENT"
START_RE = /^\s*#{START}/um
PATTERN_RE = /^\s*(#{START}.*?)>/um
PATTERN_RE = /^\s*#{START}\s+((?:[:\w_][-\.\w_]*:)?[-!\*\.\w_]*)(.*?)>/
#\s*((((["']).*?\5)|[^\/'">]*)*?)(\/)?>/um, true)
def initialize match
@name = match[1]
@rest = match[2]
end
end
end
end

View file

@ -0,0 +1,56 @@
require "rexml/child"
module REXML
module DTD
class EntityDecl < Child
START = "<!ENTITY"
START_RE = /^\s*#{START}/um
PUBLIC = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+PUBLIC\s+((["']).*?\3)\s+((["']).*?\5)\s*>/um
SYSTEM = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+SYSTEM\s+((["']).*?\3)(?:\s+NDATA\s+\w+)?\s*>/um
PLAIN = /^\s*#{START}\s+(\w+)\s+((["']).*?\3)\s*>/um
PERCENT = /^\s*#{START}\s+%\s+(\w+)\s+((["']).*?\3)\s*>/um
# <!ENTITY name SYSTEM "...">
# <!ENTITY name "...">
def initialize src
super()
md = nil
if src.match( PUBLIC )
md = src.match( PUBLIC, true )
@middle = "PUBLIC"
@content = "#{md[2]} #{md[4]}"
elsif src.match( SYSTEM )
md = src.match( SYSTEM, true )
@middle = "SYSTEM"
@content = md[2]
elsif src.match( PLAIN )
md = src.match( PLAIN, true )
@middle = ""
@content = md[2]
elsif src.match( PERCENT )
md = src.match( PERCENT, true )
@middle = ""
@content = md[2]
end
raise ParseException.new("failed Entity match", src) if md.nil?
@name = md[1]
end
def to_s
rv = "<!ENTITY #@name "
rv << "#@middle " if @middle.size > 0
rv << @content
rv
end
def write( output, indent )
indent( output, indent )
output << to_s
end
def EntityDecl.parse_source source, listener
md = source.match( PATTERN_RE, true )
thing = md[0].squeeze(" \t\n\r")
listener.send inspect.downcase, thing
end
end
end
end

View file

@ -0,0 +1,39 @@
require "rexml/child"
module REXML
module DTD
class NotationDecl < Child
START = "<!NOTATION"
START_RE = /^\s*#{START}/um
PUBLIC = /^\s*#{START}\s+(\w[\w-]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
SYSTEM = /^\s*#{START}\s+(\w[\w-]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
def initialize src
super()
if src.match( PUBLIC )
md = src.match( PUBLIC, true )
elsif src.match( SYSTEM )
md = src.match( SYSTEM, true )
else
raise ParseException.new( "error parsing notation: no matching pattern", src )
end
@name = md[1]
@middle = md[2]
@rest = md[3]
end
def to_s
"<!NOTATION #@name #@middle #@rest>"
end
def write( output, indent )
indent( output, indent )
output << to_s
end
def NotationDecl.parse_source source, listener
md = source.match( PATTERN_RE, true )
thing = md[0].squeeze(" \t\n\r")
listener.send inspect.downcase, thing
end
end
end
end

1244
vendor/plugins/rexml/lib/rexml/element.rb vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,71 @@
# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
module REXML
module Encoding
@encoding_methods = {}
def self.register(enc, &block)
@encoding_methods[enc] = block
end
def self.apply(obj, enc)
@encoding_methods[enc][obj]
end
def self.encoding_method(enc)
@encoding_methods[enc]
end
# Native, default format is UTF-8, so it is declared here rather than in
# an encodings/ definition.
UTF_8 = 'UTF-8'
UTF_16 = 'UTF-16'
UNILE = 'UNILE'
# ID ---> Encoding name
attr_reader :encoding
def encoding=( enc )
old_verbosity = $VERBOSE
begin
$VERBOSE = false
enc = enc.nil? ? nil : enc.upcase
return false if defined? @encoding and enc == @encoding
if enc and enc != UTF_8
@encoding = enc
raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
@encoding.untaint
begin
require 'rexml/encodings/ICONV.rb'
Encoding.apply(self, "ICONV")
rescue LoadError, Exception
begin
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
require enc_file
Encoding.apply(self, @encoding)
rescue LoadError => err
puts err.message
raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
end
end
else
@encoding = UTF_8
require 'rexml/encodings/UTF-8.rb'
Encoding.apply(self, @encoding)
end
ensure
$VERBOSE = old_verbosity
end
true
end
def check_encoding str
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
if str[0,2] == "\xfe\xff"
str[0,2] = ""
return UTF_16
elsif str[0,2] == "\xff\xfe"
str[0,2] = ""
return UNILE
end
str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m
return $3.upcase if $3
return UTF_8
end
end
end

View file

@ -0,0 +1,103 @@
#
# This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
#
module REXML
module Encoding
register( "CP-1252" ) do |o|
class << o
alias encode encode_cp1252
alias decode decode_cp1252
end
end
# Convert from UTF-8
def encode_cp1252(content)
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
case num
# shortcut first bunch basic characters
when 0..0xFF; array_enc << num
# characters added compared to iso-8859-1
when 0x20AC; array_enc << 0x80 # 0xe2 0x82 0xac
when 0x201A; array_enc << 0x82 # 0xe2 0x82 0x9a
when 0x0192; array_enc << 0x83 # 0xc6 0x92
when 0x201E; array_enc << 0x84 # 0xe2 0x82 0x9e
when 0x2026; array_enc << 0x85 # 0xe2 0x80 0xa6
when 0x2020; array_enc << 0x86 # 0xe2 0x80 0xa0
when 0x2021; array_enc << 0x87 # 0xe2 0x80 0xa1
when 0x02C6; array_enc << 0x88 # 0xcb 0x86
when 0x2030; array_enc << 0x89 # 0xe2 0x80 0xb0
when 0x0160; array_enc << 0x8A # 0xc5 0xa0
when 0x2039; array_enc << 0x8B # 0xe2 0x80 0xb9
when 0x0152; array_enc << 0x8C # 0xc5 0x92
when 0x017D; array_enc << 0x8E # 0xc5 0xbd
when 0x2018; array_enc << 0x91 # 0xe2 0x80 0x98
when 0x2019; array_enc << 0x92 # 0xe2 0x80 0x99
when 0x201C; array_enc << 0x93 # 0xe2 0x80 0x9c
when 0x201D; array_enc << 0x94 # 0xe2 0x80 0x9d
when 0x2022; array_enc << 0x95 # 0xe2 0x80 0xa2
when 0x2013; array_enc << 0x96 # 0xe2 0x80 0x93
when 0x2014; array_enc << 0x97 # 0xe2 0x80 0x94
when 0x02DC; array_enc << 0x98 # 0xcb 0x9c
when 0x2122; array_enc << 0x99 # 0xe2 0x84 0xa2
when 0x0161; array_enc << 0x9A # 0xc5 0xa1
when 0x203A; array_enc << 0x9B # 0xe2 0x80 0xba
when 0x0152; array_enc << 0x9C # 0xc5 0x93
when 0x017E; array_enc << 0x9E # 0xc5 0xbe
when 0x0178; array_enc << 0x9F # 0xc5 0xb8
else
# all remaining basic characters can be used directly
if num <= 0xFF
array_enc << num
else
# Numeric entity (&#nnnn;); shard by Stefan Scholl
array_enc.concat "&\##{num};".unpack('C*')
end
end
end
array_enc.pack('C*')
end
# Convert to UTF-8
def decode_cp1252(str)
array_latin9 = str.unpack('C*')
array_enc = []
array_latin9.each do |num|
case num
# characters that added compared to iso-8859-1
when 0x80; array_enc << 0x20AC # 0xe2 0x82 0xac
when 0x82; array_enc << 0x201A # 0xe2 0x82 0x9a
when 0x83; array_enc << 0x0192 # 0xc6 0x92
when 0x84; array_enc << 0x201E # 0xe2 0x82 0x9e
when 0x85; array_enc << 0x2026 # 0xe2 0x80 0xa6
when 0x86; array_enc << 0x2020 # 0xe2 0x80 0xa0
when 0x87; array_enc << 0x2021 # 0xe2 0x80 0xa1
when 0x88; array_enc << 0x02C6 # 0xcb 0x86
when 0x89; array_enc << 0x2030 # 0xe2 0x80 0xb0
when 0x8A; array_enc << 0x0160 # 0xc5 0xa0
when 0x8B; array_enc << 0x2039 # 0xe2 0x80 0xb9
when 0x8C; array_enc << 0x0152 # 0xc5 0x92
when 0x8E; array_enc << 0x017D # 0xc5 0xbd
when 0x91; array_enc << 0x2018 # 0xe2 0x80 0x98
when 0x92; array_enc << 0x2019 # 0xe2 0x80 0x99
when 0x93; array_enc << 0x201C # 0xe2 0x80 0x9c
when 0x94; array_enc << 0x201D # 0xe2 0x80 0x9d
when 0x95; array_enc << 0x2022 # 0xe2 0x80 0xa2
when 0x96; array_enc << 0x2013 # 0xe2 0x80 0x93
when 0x97; array_enc << 0x2014 # 0xe2 0x80 0x94
when 0x98; array_enc << 0x02DC # 0xcb 0x9c
when 0x99; array_enc << 0x2122 # 0xe2 0x84 0xa2
when 0x9A; array_enc << 0x0161 # 0xc5 0xa1
when 0x9B; array_enc << 0x203A # 0xe2 0x80 0xba
when 0x9C; array_enc << 0x0152 # 0xc5 0x93
when 0x9E; array_enc << 0x017E # 0xc5 0xbe
when 0x9F; array_enc << 0x0178 # 0xc5 0xb8
else
array_enc << num
end
end
array_enc.pack('U*')
end
end
end

View file

@ -0,0 +1,35 @@
module REXML
module Encoding
begin
require 'uconv'
def decode_eucjp(str)
Uconv::euctou8(str)
end
def encode_eucjp content
Uconv::u8toeuc(content)
end
rescue LoadError
require 'nkf'
EUCTOU8 = '-Ewm0'
U8TOEUC = '-Wem0'
def decode_eucjp(str)
NKF.nkf(EUCTOU8, str)
end
def encode_eucjp content
NKF.nkf(U8TOEUC, content)
end
end
register("EUC-JP") do |obj|
class << obj
alias decode decode_eucjp
alias encode encode_eucjp
end
end
end
end

View file

@ -0,0 +1,22 @@
require "iconv"
raise LoadError unless defined? Iconv
module REXML
module Encoding
def decode_iconv(str)
Iconv.conv(UTF_8, @encoding, str)
end
def encode_iconv(content)
Iconv.conv(@encoding, UTF_8, content)
end
register("ICONV") do |obj|
Iconv.conv(UTF_8, obj.encoding, nil)
class << obj
alias decode decode_iconv
alias encode encode_iconv
end
end
end
end

View file

@ -0,0 +1,7 @@
require 'rexml/encodings/US-ASCII'
module REXML
module Encoding
register("ISO-8859-1", &encoding_method("US-ASCII"))
end
end

View file

@ -0,0 +1,72 @@
#
# This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
#
module REXML
module Encoding
register("ISO-8859-15") do |o|
alias encode to_iso_8859_15
alias decode from_iso_8859_15
end
# Convert from UTF-8
def to_iso_8859_15(content)
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
case num
# shortcut first bunch basic characters
when 0..0xA3; array_enc << num
# characters removed compared to iso-8859-1
when 0xA4; array_enc << '&#164;'
when 0xA6; array_enc << '&#166;'
when 0xA8; array_enc << '&#168;'
when 0xB4; array_enc << '&#180;'
when 0xB8; array_enc << '&#184;'
when 0xBC; array_enc << '&#188;'
when 0xBD; array_enc << '&#189;'
when 0xBE; array_enc << '&#190;'
# characters added compared to iso-8859-1
when 0x20AC; array_enc << 0xA4 # 0xe2 0x82 0xac
when 0x0160; array_enc << 0xA6 # 0xc5 0xa0
when 0x0161; array_enc << 0xA8 # 0xc5 0xa1
when 0x017D; array_enc << 0xB4 # 0xc5 0xbd
when 0x017E; array_enc << 0xB8 # 0xc5 0xbe
when 0x0152; array_enc << 0xBC # 0xc5 0x92
when 0x0153; array_enc << 0xBD # 0xc5 0x93
when 0x0178; array_enc << 0xBE # 0xc5 0xb8
else
# all remaining basic characters can be used directly
if num <= 0xFF
array_enc << num
else
# Numeric entity (&#nnnn;); shard by Stefan Scholl
array_enc.concat "&\##{num};".unpack('C*')
end
end
end
array_enc.pack('C*')
end
# Convert to UTF-8
def from_iso_8859_15(str)
array_latin9 = str.unpack('C*')
array_enc = []
array_latin9.each do |num|
case num
# characters that differ compared to iso-8859-1
when 0xA4; array_enc << 0x20AC
when 0xA6; array_enc << 0x0160
when 0xA8; array_enc << 0x0161
when 0xB4; array_enc << 0x017D
when 0xB8; array_enc << 0x017E
when 0xBC; array_enc << 0x0152
when 0xBD; array_enc << 0x0153
when 0xBE; array_enc << 0x0178
else
array_enc << num
end
end
array_enc.pack('U*')
end
end
end

View file

@ -0,0 +1,37 @@
module REXML
module Encoding
begin
require 'uconv'
def decode_sjis content
Uconv::sjistou8(content)
end
def encode_sjis(str)
Uconv::u8tosjis(str)
end
rescue LoadError
require 'nkf'
SJISTOU8 = '-Swm0'
U8TOSJIS = '-Wsm0'
def decode_sjis(str)
NKF.nkf(SJISTOU8, str)
end
def encode_sjis content
NKF.nkf(U8TOSJIS, content)
end
end
b = proc do |obj|
class << obj
alias decode decode_sjis
alias encode encode_sjis
end
end
register("SHIFT-JIS", &b)
register("SHIFT_JIS", &b)
end
end

View file

@ -0,0 +1 @@
require 'rexml/encodings/SHIFT-JIS'

View file

@ -0,0 +1,34 @@
module REXML
module Encoding
def encode_unile content
array_utf8 = content.unpack("U*")
array_enc = []
array_utf8.each do |num|
if ((num>>16) > 0)
array_enc << ??
array_enc << 0
else
array_enc << (num & 0xFF)
array_enc << (num >> 8)
end
end
array_enc.pack('C*')
end
def decode_unile(str)
array_enc=str.unpack('C*')
array_utf8 = []
0.step(array_enc.size-1, 2){|i|
array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100)
}
array_utf8.pack('U*')
end
register(UNILE) do |obj|
class << obj
alias decode decode_unile
alias encode encode_unile
end
end
end
end

View file

@ -0,0 +1,30 @@
module REXML
module Encoding
# Convert from UTF-8
def encode_ascii content
array_utf8 = content.unpack('U*')
array_enc = []
array_utf8.each do |num|
if num <= 0x7F
array_enc << num
else
# Numeric entity (&#nnnn;); shard by Stefan Scholl
array_enc.concat "&\##{num};".unpack('C*')
end
end
array_enc.pack('C*')
end
# Convert to UTF-8
def decode_ascii(str)
str.unpack('C*').pack('U*')
end
register("US-ASCII") do |obj|
class << obj
alias decode decode_ascii
alias encode encode_ascii
end
end
end
end

View file

@ -0,0 +1,35 @@
module REXML
module Encoding
def encode_utf16 content
array_utf8 = content.unpack("U*")
array_enc = []
array_utf8.each do |num|
if ((num>>16) > 0)
array_enc << 0
array_enc << ??
else
array_enc << (num >> 8)
array_enc << (num & 0xFF)
end
end
array_enc.pack('C*')
end
def decode_utf16(str)
str = str[2..-1] if /^\376\377/n =~ str
array_enc=str.unpack('C*')
array_utf8 = []
0.step(array_enc.size-1, 2){|i|
array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100)
}
array_utf8.pack('U*')
end
register(UTF_16) do |obj|
class << obj
alias decode decode_utf16
alias encode encode_utf16
end
end
end
end

View file

@ -0,0 +1,18 @@
module REXML
module Encoding
def encode_utf8 content
content
end
def decode_utf8(str)
str
end
register(UTF_8) do |obj|
class << obj
alias decode decode_utf8
alias encode encode_utf8
end
end
end
end

165
vendor/plugins/rexml/lib/rexml/entity.rb vendored Normal file
View file

@ -0,0 +1,165 @@
require 'rexml/child'
require 'rexml/source'
require 'rexml/xmltokens'
module REXML
# God, I hate DTDs. I really do. Why this idiot standard still
# plagues us is beyond me.
class Entity < Child
include XMLTokens
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
NDATADECL = "\\s+NDATA\\s+#{NAME}"
PEREFERENCE = "%#{NAME};"
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
attr_reader :name, :external, :ref, :ndata, :pubid
# Create a new entity. Simple entities can be constructed by passing a
# name, value to the constructor; this creates a generic, plain entity
# reference. For anything more complicated, you have to pass a Source to
# the constructor with the entity definiton, or use the accessor methods.
# +WARNING+: There is no validation of entity state except when the entity
# is read from a stream. If you start poking around with the accessors,
# you can easily create a non-conformant Entity. The best thing to do is
# dump the stupid DTDs and use XMLSchema instead.
#
# e = Entity.new( 'amp', '&' )
def initialize stream, value=nil, parent=nil, reference=false
super(parent)
@ndata = @pubid = @value = @external = nil
if stream.kind_of? Array
@name = stream[1]
if stream[-1] == '%'
@reference = true
stream.pop
else
@reference = false
end
if stream[2] =~ /SYSTEM|PUBLIC/
@external = stream[2]
if @external == 'SYSTEM'
@ref = stream[3]
@ndata = stream[4] if stream.size == 5
else
@pubid = stream[3]
@ref = stream[4]
end
else
@value = stream[2]
end
else
@reference = reference
@external = nil
@name = stream
@value = value
end
end
# Evaluates whether the given string matchs an entity definition,
# returning true if so, and false otherwise.
def Entity::matches? string
(ENTITYDECL =~ string) == 0
end
# Evaluates to the unnormalized value of this entity; that is, replacing
# all entities -- both %ent; and &ent; entities. This differs from
# +value()+ in that +value+ only replaces %ent; entities.
def unnormalized
v = value()
return nil if v.nil?
@unnormalized = Text::unnormalize(v, parent)
@unnormalized
end
#once :unnormalized
# Returns the value of this entity unprocessed -- raw. This is the
# normalized value; that is, with all %ent; and &ent; entities intact
def normalized
@value
end
# Write out a fully formed, correct entity definition (assuming the Entity
# object itself is valid.)
#
# out::
# An object implementing <TT>&lt;&lt;<TT> to which the entity will be
# output
# indent::
# *DEPRECATED* and ignored
def write out, indent=-1
out << '<!ENTITY '
out << '% ' if @reference
out << @name
out << ' '
if @external
out << @external << ' '
if @pubid
q = @pubid.include?('"')?"'":'"'
out << q << @pubid << q << ' '
end
q = @ref.include?('"')?"'":'"'
out << q << @ref << q
out << ' NDATA ' << @ndata if @ndata
else
q = @value.include?('"')?"'":'"'
out << q << @value << q
end
out << '>'
end
# Returns this entity as a string. See write().
def to_s
rv = ''
write rv
rv
end
PEREFERENCE_RE = /#{PEREFERENCE}/um
# Returns the value of this entity. At the moment, only internal entities
# are processed. If the value contains internal references (IE,
# %blah;), those are replaced with their values. IE, if the doctype
# contains:
# <!ENTITY % foo "bar">
# <!ENTITY yada "nanoo %foo; nanoo>
# then:
# doctype.entity('yada').value #-> "nanoo bar nanoo"
def value
if @value
matches = @value.scan(PEREFERENCE_RE)
rv = @value.clone
if @parent
matches.each do |entity_reference|
entity_value = @parent.entity( entity_reference[0] )
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
end
end
return rv
end
nil
end
end
# This is a set of entity constants -- the ones defined in the XML
# specification. These are +gt+, +lt+, +amp+, +quot+ and +apos+.
module EntityConst
# +>+
GT = Entity.new( 'gt', '>' )
# +<+
LT = Entity.new( 'lt', '<' )
# +&+
AMP = Entity.new( 'amp', '&' )
# +"+
QUOT = Entity.new( 'quot', '"' )
# +'+
APOS = Entity.new( 'apos', "'" )
end
end

View file

@ -0,0 +1,109 @@
module REXML
module Formatters
class Default
# Prints out the XML document with no formatting -- except if id_hack is
# set.
#
# ie_hack::
# If set to true, then inserts whitespace before the close of an empty
# tag, so that IE's bad XML parser doesn't choke.
def initialize( ie_hack=false )
@ie_hack = ie_hack
end
# Writes the node to some output.
#
# node::
# The node to write
# output::
# A class implementing <TT>&lt;&lt;</TT>. Pass in an Output object to
# change the output encoding.
def write( node, output )
case node
when Document
if node.xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
output = Output.new( output, node.xml_decl.encoding )
end
write_document( node, output )
when Element
write_element( node, output )
when Declaration, ElementDecl, NotationDecl, ExternalEntity, Entity,
Attribute, AttlistDecl
node.write( output,-1 )
when Instruction
write_instruction( node, output )
when DocType, XMLDecl
node.write( output )
when Comment
write_comment( node, output )
when CData
write_cdata( node, output )
when Text
write_text( node, output )
else
raise Exception.new("XML FORMATTING ERROR")
end
end
protected
def write_document( node, output )
node.children.each { |child| write( child, output ) }
end
def write_element( node, output )
output << "<#{node.expanded_name}"
node.attributes.to_a.sort_by {|attr| attr.name}.each do |attr|
output << " "
attr.write( output )
end unless node.attributes.empty?
if node.children.empty?
output << " " if @ie_hack
output << "/"
else
output << ">"
node.children.each { |child|
write( child, output )
}
output << "</#{node.expanded_name}"
end
output << ">"
end
def write_text( node, output )
output << node.to_s()
end
def write_comment( node, output )
output << Comment::START
output << node.to_s
output << Comment::STOP
end
def write_cdata( node, output )
output << CData::START
output << node.to_s
output << CData::STOP
end
def write_instruction( node, output )
output << Instruction::START.sub(/\\/u, '')
output << node.target
output << ' '
output << node.content
output << Instruction::STOP.sub(/\\/u, '')
end
end
end
end

View file

@ -0,0 +1,138 @@
require 'rexml/formatters/default'
module REXML
module Formatters
# Pretty-prints an XML document. This destroys whitespace in text nodes
# and will insert carriage returns and indentations.
#
# TODO: Add an option to print attributes on new lines
class Pretty < Default
# If compact is set to true, then the formatter will attempt to use as
# little space as possible
attr_accessor :compact
# The width of a page. Used for formatting text
attr_accessor :width
# Create a new pretty printer.
#
# output::
# An object implementing '<<(String)', to which the output will be written.
# indentation::
# An integer greater than 0. The indentation of each level will be
# this number of spaces. If this is < 1, the behavior of this object
# is undefined. Defaults to 2.
# ie_hack::
# If true, the printer will insert whitespace before closing empty
# tags, thereby allowing Internet Explorer's feeble XML parser to
# function. Defaults to false.
def initialize( indentation=2, ie_hack=false )
@indentation = indentation
@level = 0
@ie_hack = ie_hack
@width = 80
@compact = false
end
protected
def write_element(node, output)
output << ' '*@level
output << "<#{node.expanded_name}"
node.attributes.each_attribute do |attr|
output << " "
attr.write( output )
end unless node.attributes.empty?
if node.children.empty?
if @ie_hack
output << " "
end
output << "/"
else
output << ">"
# If compact and all children are text, and if the formatted output
# is less than the specified width, then try to print everything on
# one line
skip = false
if compact
if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
string = ""
old_level = @level
@level = 0
node.children.each { |child| write( child, string ) }
@level = old_level
if string.length < @width
output << string
skip = true
end
end
end
unless skip
output << "\n"
@level += @indentation
node.children.each { |child|
next if child.kind_of?(Text) and child.to_s.strip.length == 0
write( child, output )
output << "\n"
}
@level -= @indentation
output << ' '*@level
end
output << "</#{node.expanded_name}"
end
output << ">"
end
def write_text( node, output )
s = node.to_s()
s.gsub!(/\s/,' ')
s.squeeze!(" ")
s = wrap(s, 80-@level)
s = indent_text(s, @level, " ", true)
output << (' '*@level + s)
end
def write_comment( node, output)
output << ' ' * @level
super
end
def write_cdata( node, output)
output << ' ' * @level
super
end
def write_document( node, output )
# Ok, this is a bit odd. All XML documents have an XML declaration,
# but it may not write itself if the user didn't specifically add it,
# either through the API or in the input document. If it doesn't write
# itself, then we don't need a carriage return... which makes this
# logic more complex.
node.children.each { |child|
next if child == node.children[-1] and child.instance_of?(Text)
unless child == node.children[0] or child.instance_of?(Text) or
(child == node.children[1] and !node.children[0].writethis)
output << "\n"
end
write( child, output )
}
end
private
def indent_text(string, level=1, style="\t", indentfirstline=true)
return string if level < 0
string.gsub(/\n/, "\n#{style*level}")
end
def wrap(string, width)
# Recursivly wrap string at width.
return string if string.length <= width
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
return string[0,place] + "\n" + wrap(string[place+1..-1], width)
end
end
end
end

View file

@ -0,0 +1,56 @@
require 'rexml/formatters/pretty'
module REXML
module Formatters
# The Transitive formatter writes an XML document that parses to an
# identical document as the source document. This means that no extra
# whitespace nodes are inserted, and whitespace within text nodes is
# preserved. Within these constraints, the document is pretty-printed,
# with whitespace inserted into the metadata to introduce formatting.
#
# Note that this is only useful if the original XML is not already
# formatted. Since this formatter does not alter whitespace nodes, the
# results of formatting already formatted XML will be odd.
class Transitive < Default
def initialize( indentation=2 )
@indentation = indentation
@level = 0
end
protected
def write_element( node, output )
output << "<#{node.expanded_name}"
node.attributes.each_attribute do |attr|
output << " "
attr.write( output )
end unless node.attributes.empty?
output << "\n"
output << ' '*@level
if node.children.empty?
output << "/"
else
output << ">"
# If compact and all children are text, and if the formatted output
# is less than the specified width, then try to print everything on
# one line
skip = false
@level += @indentation
node.children.each { |child|
write( child, output )
}
@level -= @indentation
output << "</#{node.expanded_name}"
output << "\n"
output << ' '*@level
end
output << ">"
end
def write_text( node, output )
output << node.to_s()
end
end
end
end

View file

@ -0,0 +1,388 @@
module REXML
# If you add a method, keep in mind two things:
# (1) the first argument will always be a list of nodes from which to
# filter. In the case of context methods (such as position), the function
# should return an array with a value for each child in the array.
# (2) all method calls from XML will have "-" replaced with "_".
# Therefore, in XML, "local-name()" is identical (and actually becomes)
# "local_name()"
module Functions
@@context = nil
@@namespace_context = {}
@@variables = {}
def Functions::namespace_context=(x) ; @@namespace_context=x ; end
def Functions::variables=(x) ; @@variables=x ; end
def Functions::namespace_context ; @@namespace_context ; end
def Functions::variables ; @@variables ; end
def Functions::context=(value); @@context = value; end
def Functions::text( )
if @@context[:node].node_type == :element
return @@context[:node].find_all{|n| n.node_type == :text}.collect{|n| n.value}
elsif @@context[:node].node_type == :text
return @@context[:node].value
else
return false
end
end
def Functions::last( )
@@context[:size]
end
def Functions::position( )
@@context[:index]
end
def Functions::count( node_set )
node_set.size
end
# Since REXML is non-validating, this method is not implemented as it
# requires a DTD
def Functions::id( object )
end
# UNTESTED
def Functions::local_name( node_set=nil )
get_namespace( node_set ) do |node|
return node.local_name
end
end
def Functions::namespace_uri( node_set=nil )
get_namespace( node_set ) {|node| node.namespace}
end
def Functions::name( node_set=nil )
get_namespace( node_set ) do |node|
node.expanded_name
end
end
# Helper method.
def Functions::get_namespace( node_set = nil )
if node_set == nil
yield @@context[:node] if defined? @@context[:node].namespace
else
if node_set.respond_to? :each
node_set.each { |node| yield node if defined? node.namespace }
elsif node_set.respond_to? :namespace
yield node_set
end
end
end
# A node-set is converted to a string by returning the string-value of the
# node in the node-set that is first in document order. If the node-set is
# empty, an empty string is returned.
#
# A number is converted to a string as follows
#
# NaN is converted to the string NaN
#
# positive zero is converted to the string 0
#
# negative zero is converted to the string 0
#
# positive infinity is converted to the string Infinity
#
# negative infinity is converted to the string -Infinity
#
# if the number is an integer, the number is represented in decimal form
# as a Number with no decimal point and no leading zeros, preceded by a
# minus sign (-) if the number is negative
#
# otherwise, the number is represented in decimal form as a Number
# including a decimal point with at least one digit before the decimal
# point and at least one digit after the decimal point, preceded by a
# minus sign (-) if the number is negative; there must be no leading zeros
# before the decimal point apart possibly from the one required digit
# immediately before the decimal point; beyond the one required digit
# after the decimal point there must be as many, but only as many, more
# digits as are needed to uniquely distinguish the number from all other
# IEEE 754 numeric values.
#
# The boolean false value is converted to the string false. The boolean
# true value is converted to the string true.
#
# An object of a type other than the four basic types is converted to a
# string in a way that is dependent on that type.
def Functions::string( object=nil )
#object = @context unless object
if object.instance_of? Array
string( object[0] )
elsif defined? object.node_type
if object.node_type == :attribute
object.value
elsif object.node_type == :element || object.node_type == :document
string_value(object)
else
object.to_s
end
elsif object.nil?
return ""
else
object.to_s
end
end
def Functions::string_value( o )
rv = ""
o.children.each { |e|
if e.node_type == :text
rv << e.to_s
elsif e.node_type == :element
rv << string_value( e )
end
}
rv
end
# UNTESTED
def Functions::concat( *objects )
objects.join
end
# Fixed by Mike Stok
def Functions::starts_with( string, test )
string(string).index(string(test)) == 0
end
# Fixed by Mike Stok
def Functions::contains( string, test )
string(string).include?(string(test))
end
# Kouhei fixed this
def Functions::substring_before( string, test )
ruby_string = string(string)
ruby_index = ruby_string.index(string(test))
if ruby_index.nil?
""
else
ruby_string[ 0...ruby_index ]
end
end
# Kouhei fixed this too
def Functions::substring_after( string, test )
ruby_string = string(string)
test_string = string(test)
return $1 if ruby_string =~ /#{test}(.*)/
""
end
# Take equal portions of Mike Stok and Sean Russell; mix
# vigorously, and pour into a tall, chilled glass. Serves 10,000.
def Functions::substring( string, start, length=nil )
ruby_string = string(string)
ruby_length = if length.nil?
ruby_string.length.to_f
else
number(length)
end
ruby_start = number(start)
# Handle the special cases
return '' if (
ruby_length.nan? or
ruby_start.nan? or
ruby_start.infinite?
)
infinite_length = ruby_length.infinite? == 1
ruby_length = ruby_string.length if infinite_length
# Now, get the bounds. The XPath bounds are 1..length; the ruby bounds
# are 0..length. Therefore, we have to offset the bounds by one.
ruby_start = ruby_start.round - 1
ruby_length = ruby_length.round
if ruby_start < 0
ruby_length += ruby_start unless infinite_length
ruby_start = 0
end
return '' if ruby_length <= 0
ruby_string[ruby_start,ruby_length]
end
# UNTESTED
def Functions::string_length( string )
string(string).length
end
# UNTESTED
def Functions::normalize_space( string=nil )
string = string(@@context[:node]) if string.nil?
if string.kind_of? Array
string.collect{|x| string.to_s.strip.gsub(/\s+/um, ' ') if string}
else
string.to_s.strip.gsub(/\s+/um, ' ')
end
end
# This is entirely Mike Stok's beast
def Functions::translate( string, tr1, tr2 )
from = string(tr1)
to = string(tr2)
# the map is our translation table.
#
# if a character occurs more than once in the
# from string then we ignore the second &
# subsequent mappings
#
# if a charactcer maps to nil then we delete it
# in the output. This happens if the from
# string is longer than the to string
#
# there's nothing about - or ^ being special in
# http://www.w3.org/TR/xpath#function-translate
# so we don't build ranges or negated classes
map = Hash.new
0.upto(from.length - 1) { |pos|
from_char = from[pos]
unless map.has_key? from_char
map[from_char] =
if pos < to.length
to[pos]
else
nil
end
end
}
if ''.respond_to? :chars
string(string).chars.collect { |c|
if map.has_key? c then map[c] else c end
}.compact.join
else
string(string).unpack('U*').collect { |c|
if map.has_key? c then map[c] else c end
}.compact.pack('U*')
end
end
# UNTESTED
def Functions::boolean( object=nil )
if object.kind_of? String
if object =~ /\d+/u
return object.to_f != 0
else
return object.size > 0
end
elsif object.kind_of? Array
object = object.find{|x| x and true}
end
return object ? true : false
end
# UNTESTED
def Functions::not( object )
not boolean( object )
end
# UNTESTED
def Functions::true( )
true
end
# UNTESTED
def Functions::false( )
false
end
# UNTESTED
def Functions::lang( language )
lang = false
node = @@context[:node]
attr = nil
until node.nil?
if node.node_type == :element
attr = node.attributes["xml:lang"]
unless attr.nil?
lang = compare_language(string(language), attr)
break
else
end
end
node = node.parent
end
lang
end
def Functions::compare_language lang1, lang2
lang2.downcase.index(lang1.downcase) == 0
end
# a string that consists of optional whitespace followed by an optional
# minus sign followed by a Number followed by whitespace is converted to
# the IEEE 754 number that is nearest (according to the IEEE 754
# round-to-nearest rule) to the mathematical value represented by the
# string; any other string is converted to NaN
#
# boolean true is converted to 1; boolean false is converted to 0
#
# a node-set is first converted to a string as if by a call to the string
# function and then converted in the same way as a string argument
#
# an object of a type other than the four basic types is converted to a
# number in a way that is dependent on that type
def Functions::number( object=nil )
object = @@context[:node] unless object
case object
when true
Float(1)
when false
Float(0)
when Array
number(string( object ))
when Numeric
object.to_f
else
str = string( object )
# If XPath ever gets scientific notation...
#if str =~ /^\s*-?(\d*\.?\d+|\d+\.)([Ee]\d*)?\s*$/
if str =~ /^\s*-?(\d*\.?\d+|\d+\.)\s*$/
str.to_f
else
(0.0 / 0.0)
end
end
end
def Functions::sum( nodes )
nodes = [nodes] unless nodes.kind_of? Array
nodes.inject(0) { |r,n| r += number(string(n)) }
end
def Functions::floor( number )
number(number).floor
end
def Functions::ceiling( number )
number(number).ceil
end
def Functions::round( number )
begin
number(number).round
rescue FloatDomainError
number(number)
end
end
def Functions::processing_instruction( node )
node.node_type == :processing_instruction
end
def Functions::method_missing( id )
puts "METHOD MISSING #{id.id2name}"
XPath.match( @@context[:node], id.id2name )
end
end
end

View file

@ -0,0 +1,70 @@
require "rexml/child"
require "rexml/source"
module REXML
# Represents an XML Instruction; IE, <? ... ?>
# TODO: Add parent arg (3rd arg) to constructor
class Instruction < Child
START = '<\?'
STOP = '\?>'
# target is the "name" of the Instruction; IE, the "tag" in <?tag ...?>
# content is everything else.
attr_accessor :target, :content
# Constructs a new Instruction
# @param target can be one of a number of things. If String, then
# the target of this instruction is set to this. If an Instruction,
# then the Instruction is shallowly cloned (target and content are
# copied). If a Source, then the source is scanned and parsed for
# an Instruction declaration.
# @param content Must be either a String, or a Parent. Can only
# be a Parent if the target argument is a Source. Otherwise, this
# String is set as the content of this instruction.
def initialize(target, content=nil)
if target.kind_of? String
super()
@target = target
@content = content
elsif target.kind_of? Instruction
super(content)
@target = target.target
@content = target.content
end
@content.strip! if @content
end
def clone
Instruction.new self
end
# == DEPRECATED
# See the rexml/formatters package
#
def write writer, indent=-1, transitive=false, ie_hack=false
Kernel.warn( "#{self.class.name}.write is deprecated" )
indent(writer, indent)
writer << START.sub(/\\/u, '')
writer << @target
writer << ' '
writer << @content
writer << STOP.sub(/\\/u, '')
end
# @return true if other is an Instruction, and the content and target
# of the other matches the target and content of this object.
def ==( other )
other.kind_of? Instruction and
other.target == @target and
other.content == @content
end
def node_type
:processing_instruction
end
def inspect
"<?p-i #{target} ...?>"
end
end
end

View file

@ -0,0 +1,196 @@
require 'rexml/xmltokens'
require 'rexml/light/node'
# [ :element, parent, name, attributes, children* ]
# a = Node.new
# a << "B" # => <a>B</a>
# a.b # => <a>B<b/></a>
# a.b[1] # => <a>B<b/><b/><a>
# a.b[1]["x"] = "y" # => <a>B<b/><b x="y"/></a>
# a.b[0].c # => <a>B<b><c/></b><b x="y"/></a>
# a.b.c << "D" # => <a>B<b><c>D</c></b><b x="y"/></a>
module REXML
module Light
# Represents a tagged XML element. Elements are characterized by
# having children, attributes, and names, and can themselves be
# children.
class Node
NAMESPLIT = /^(?:(#{XMLTokens::NCNAME_STR}):)?(#{XMLTokens::NCNAME_STR})/u
PARENTS = [ :element, :document, :doctype ]
# Create a new element.
def initialize node=nil
@node = node
if node.kind_of? String
node = [ :text, node ]
elsif node.nil?
node = [ :document, nil, nil ]
elsif node[0] == :start_element
node[0] = :element
elsif node[0] == :start_doctype
node[0] = :doctype
elsif node[0] == :start_document
node[0] = :document
end
end
def size
if PARENTS.include? @node[0]
@node[-1].size
else
0
end
end
def each( &block )
size.times { |x| yield( at(x+4) ) }
end
def name
at(2)
end
def name=( name_str, ns=nil )
pfx = ''
pfx = "#{prefix(ns)}:" if ns
_old_put(2, "#{pfx}#{name_str}")
end
def parent=( node )
_old_put(1,node)
end
def local_name
namesplit
@name
end
def local_name=( name_str )
_old_put( 1, "#@prefix:#{name_str}" )
end
def prefix( namespace=nil )
prefix_of( self, namespace )
end
def namespace( prefix=prefix() )
namespace_of( self, prefix )
end
def namespace=( namespace )
@prefix = prefix( namespace )
pfx = ''
pfx = "#@prefix:" if @prefix.size > 0
_old_put(1, "#{pfx}#@name")
end
def []( reference, ns=nil )
if reference.kind_of? String
pfx = ''
pfx = "#{prefix(ns)}:" if ns
at(3)["#{pfx}#{reference}"]
elsif reference.kind_of? Range
_old_get( Range.new(4+reference.begin, reference.end, reference.exclude_end?) )
else
_old_get( 4+reference )
end
end
def =~( path )
XPath.match( self, path )
end
# Doesn't handle namespaces yet
def []=( reference, ns, value=nil )
if reference.kind_of? String
value = ns unless value
at( 3 )[reference] = value
elsif reference.kind_of? Range
_old_put( Range.new(3+reference.begin, reference.end, reference.exclude_end?), ns )
else
if value
_old_put( 4+reference, ns, value )
else
_old_put( 4+reference, ns )
end
end
end
# Append a child to this element, optionally under a provided namespace.
# The namespace argument is ignored if the element argument is an Element
# object. Otherwise, the element argument is a string, the namespace (if
# provided) is the namespace the element is created in.
def << element
if node_type() == :text
at(-1) << element
else
newnode = Node.new( element )
newnode.parent = self
self.push( newnode )
end
at(-1)
end
def node_type
_old_get(0)
end
def text=( foo )
replace = at(4).kind_of?(String)? 1 : 0
self._old_put(4,replace, normalizefoo)
end
def root
context = self
context = context.at(1) while context.at(1)
end
def has_name?( name, namespace = '' )
at(3) == name and namespace() == namespace
end
def children
self
end
def parent
at(1)
end
def to_s
end
private
def namesplit
return if @name.defined?
at(2) =~ NAMESPLIT
@prefix = '' || $1
@name = $2
end
def namespace_of( node, prefix=nil )
if not prefix
name = at(2)
name =~ NAMESPLIT
prefix = $1
end
to_find = 'xmlns'
to_find = "xmlns:#{prefix}" if not prefix.nil?
ns = at(3)[ to_find ]
ns ? ns : namespace_of( @node[0], prefix )
end
def prefix_of( node, namespace=nil )
if not namespace
name = node.name
name =~ NAMESPLIT
$1
else
ns = at(3).find { |k,v| v == namespace }
ns ? ns : prefix_of( node.parent, namespace )
end
end
end
end
end

View file

@ -0,0 +1,47 @@
require 'rexml/xmltokens'
module REXML
# Adds named attributes to an object.
module Namespace
# The name of the object, valid if set
attr_reader :name, :expanded_name
# The expanded name of the object, valid if name is set
attr_accessor :prefix
include XMLTokens
NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u
# Sets the name and the expanded name
def name=( name )
@expanded_name = name
name =~ NAMESPLIT
if $1
@prefix = $1
else
@prefix = ""
@namespace = ""
end
@name = $2
end
# Compares names optionally WITH namespaces
def has_name?( other, ns=nil )
if ns
return (namespace() == ns and name() == other)
elsif other.include? ":"
return fully_expanded_name == other
else
return name == other
end
end
alias :local_name :name
# Fully expand the name, even if the prefix wasn't specified in the
# source file.
def fully_expanded_name
ns = prefix
return "#{ns}:#@name" if ns.size > 0
return @name
end
end
end

75
vendor/plugins/rexml/lib/rexml/node.rb vendored Normal file
View file

@ -0,0 +1,75 @@
require "rexml/parseexception"
require "rexml/formatters/pretty"
require "rexml/formatters/default"
module REXML
# Represents a node in the tree. Nodes are never encountered except as
# superclasses of other objects. Nodes have siblings.
module Node
# @return the next sibling (nil if unset)
def next_sibling_node
return nil if @parent.nil?
@parent[ @parent.index(self) + 1 ]
end
# @return the previous sibling (nil if unset)
def previous_sibling_node
return nil if @parent.nil?
ind = @parent.index(self)
return nil if ind == 0
@parent[ ind - 1 ]
end
# indent::
# *DEPRECATED* This parameter is now ignored. See the formatters in the
# REXML::Formatters package for changing the output style.
def to_s indent=nil
unless indent.nil?
Kernel.warn( "#{self.class.name}.to_s(indent) parameter is deprecated" )
f = REXML::Formatters::Pretty.new( indent )
f.write( self, rv = "" )
else
f = REXML::Formatters::Default.new
f.write( self, rv = "" )
end
return rv
end
def indent to, ind
if @parent and @parent.context and not @parent.context[:indentstyle].nil? then
indentstyle = @parent.context[:indentstyle]
else
indentstyle = ' '
end
to << indentstyle*ind unless ind<1
end
def parent?
false;
end
# Visit all subnodes of +self+ recursively
def each_recursive(&block) # :yields: node
self.elements.each {|node|
block.call(node)
node.each_recursive(&block)
}
end
# Find (and return) first subnode (recursively) for which the block
# evaluates to true. Returns +nil+ if none was found.
def find_first_recursive(&block) # :yields: node
each_recursive {|node|
return node if block.call(node)
}
return nil
end
# Returns the position that +self+ holds in its parent's array, indexed
# from 1.
def index_in_parent
parent.index(self)+1
end
end
end

View file

@ -0,0 +1,24 @@
require 'rexml/encoding'
module REXML
class Output
include Encoding
attr_reader :encoding
def initialize real_IO, encd="iso-8859-1"
@output = real_IO
self.encoding = encd
@to_utf = encd == UTF_8 ? false : true
end
def <<( content )
@output << (@to_utf ? self.encode(content) : content)
end
def to_s
"Output[#{encoding}]"
end
end
end

166
vendor/plugins/rexml/lib/rexml/parent.rb vendored Normal file
View file

@ -0,0 +1,166 @@
require "rexml/child"
module REXML
# A parent has children, and has methods for accessing them. The Parent
# class is never encountered except as the superclass for some other
# object.
class Parent < Child
include Enumerable
# Constructor
# @param parent if supplied, will be set as the parent of this object
def initialize parent=nil
super(parent)
@children = []
end
def add( object )
#puts "PARENT GOTS #{size} CHILDREN"
object.parent = self
@children << object
#puts "PARENT NOW GOTS #{size} CHILDREN"
object
end
alias :push :add
alias :<< :push
def unshift( object )
object.parent = self
@children.unshift object
end
def delete( object )
found = false
@children.delete_if {|c| c.equal?(object) and found = true }
object.parent = nil if found
end
def each(&block)
@children.each(&block)
end
def delete_if( &block )
@children.delete_if(&block)
end
def delete_at( index )
@children.delete_at index
end
def each_index( &block )
@children.each_index(&block)
end
# Fetches a child at a given index
# @param index the Integer index of the child to fetch
def []( index )
@children[index]
end
alias :each_child :each
# Set an index entry. See Array.[]=
# @param index the index of the element to set
# @param opt either the object to set, or an Integer length
# @param child if opt is an Integer, this is the child to set
# @return the parent (self)
def []=( *args )
args[-1].parent = self
@children[*args[0..-2]] = args[-1]
end
# Inserts an child before another child
# @param child1 this is either an xpath or an Element. If an Element,
# child2 will be inserted before child1 in the child list of the parent.
# If an xpath, child2 will be inserted before the first child to match
# the xpath.
# @param child2 the child to insert
# @return the parent (self)
def insert_before( child1, child2 )
if child1.kind_of? String
child1 = XPath.first( self, child1 )
child1.parent.insert_before child1, child2
else
ind = index(child1)
child2.parent.delete(child2) if child2.parent
@children[ind,0] = child2
child2.parent = self
end
self
end
# Inserts an child after another child
# @param child1 this is either an xpath or an Element. If an Element,
# child2 will be inserted after child1 in the child list of the parent.
# If an xpath, child2 will be inserted after the first child to match
# the xpath.
# @param child2 the child to insert
# @return the parent (self)
def insert_after( child1, child2 )
if child1.kind_of? String
child1 = XPath.first( self, child1 )
child1.parent.insert_after child1, child2
else
ind = index(child1)+1
child2.parent.delete(child2) if child2.parent
@children[ind,0] = child2
child2.parent = self
end
self
end
def to_a
@children.dup
end
# Fetches the index of a given child
# @param child the child to get the index of
# @return the index of the child, or nil if the object is not a child
# of this parent.
def index( child )
count = -1
@children.find { |i| count += 1 ; i.hash == child.hash }
count
end
# @return the number of children of this parent
def size
@children.size
end
alias :length :size
# Replaces one child with another, making sure the nodelist is correct
# @param to_replace the child to replace (must be a Child)
# @param replacement the child to insert into the nodelist (must be a
# Child)
def replace_child( to_replace, replacement )
@children.map! {|c| c.equal?( to_replace ) ? replacement : c }
to_replace.parent = nil
replacement.parent = self
end
# Deeply clones this object. This creates a complete duplicate of this
# Parent, including all descendants.
def deep_clone
cl = clone()
each do |child|
if child.kind_of? Parent
cl << child.deep_clone
else
cl << child.clone
end
end
cl
end
alias :children :to_a
def parent?
true
end
end
end

View file

@ -0,0 +1,51 @@
module REXML
class ParseException < RuntimeError
attr_accessor :source, :parser, :continued_exception
def initialize( message, source=nil, parser=nil, exception=nil )
super(message)
@source = source
@parser = parser
@continued_exception = exception
end
def to_s
# Quote the original exception, if there was one
if @continued_exception
err = @continued_exception.inspect
err << "\n"
err << @continued_exception.backtrace.join("\n")
err << "\n...\n"
else
err = ""
end
# Get the stack trace and error message
err << super
# Add contextual information
if @source
err << "\nLine: #{line}\n"
err << "Position: #{position}\n"
err << "Last 80 unconsumed characters:\n"
err << @source.buffer[0..80].gsub(/\n/, ' ')
end
err
end
def position
@source.current_line[0] if @source and defined? @source.current_line and
@source.current_line
end
def line
@source.current_line[2] if @source and defined? @source.current_line and
@source.current_line
end
def context
@source.current_line
end
end
end

View file

@ -0,0 +1,530 @@
require 'rexml/parseexception'
require 'rexml/undefinednamespaceexception'
require 'rexml/source'
require 'set'
module REXML
module Parsers
# = Using the Pull Parser
# <em>This API is experimental, and subject to change.</em>
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
# while parser.has_next?
# res = parser.next
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
# end
# See the PullEvent class for information on the content of the results.
# The data is identical to the arguments passed for the various events to
# the StreamListener API.
#
# Notice that:
# parser = PullParser.new( "<a>BAD DOCUMENT" )
# while parser.has_next?
# res = parser.next
# raise res[1] if res.error?
# end
#
# Nat Price gave me some good ideas for the API.
class BaseParser
if String.method_defined? :encode
# Oniguruma / POSIX [understands unicode]
LETTER = '[[:alpha:]]'
DIGIT = '[[:digit:]]'
else
# Ruby < 1.9 [doesn't understand unicode]
LETTER = 'a-zA-Z'
DIGIT = '\d'
end
COMBININGCHAR = '' # TODO
EXTENDER = '' # TODO
NCNAME_STR= "[#{LETTER}_:][-#{LETTER}#{DIGIT}._:#{COMBININGCHAR}#{EXTENDER}]*"
NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})"
UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
NAMECHAR = '[\-\w\d\.:]'
NAME = "([\\w:]#{NAMECHAR}*)"
NMTOKEN = "(?:#{NAMECHAR})+"
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)"
REFERENCE_RE = /#{REFERENCE}/
DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\4/um
COMMENT_START = /\A<!--/u
COMMENT_PATTERN = /<!--(.*?)-->/um
CDATA_START = /\A<!\[CDATA\[/u
CDATA_END = /^\s*\]\s*>/um
CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
XMLDECL_START = /\A<\?xml\s/u;
XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>/um
INSTRUCTION_START = /\A<\?/u
INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{UNAME_STR}\s*=\s*(["']).*?\5)*)\s*(\/)?>/um
CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um
STANDALONE = /\bstandalone\s*=\s["'](.*?)['"]/um
ENTITY_START = /^\s*<!ENTITY/
IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u
ELEMENTDECL_START = /^\s*<!ELEMENT/um
ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
SYSTEMENTITY = /^\s*(%.*?;)\s*$/um
ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
ATTDEF_RE = /#{ATTDEF}/
ATTLISTDECL_START = /^\s*<!ATTLIST/um
ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
NOTATIONDECL_START = /^\s*<!NOTATION/um
PUBLIC = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
SYSTEM = /^\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
TEXT_PATTERN = /\A([^<]*)/um
# Entity constants
PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9\\-()+,./:=?;!*@$_%#"
SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
NDATADECL = "\\s+NDATA\\s+#{NAME}"
PEREFERENCE = "%#{NAME};"
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
EREFERENCE = /&(?!#{NAME};)/
DEFAULT_ENTITIES = {
'gt' => [/&gt;/, '&gt;', '>', />/],
'lt' => [/&lt;/, '&lt;', '<', /</],
'quot' => [/&quot;/, '&quot;', '"', /"/],
"apos" => [/&apos;/, "&apos;", "'", /'/]
}
######################################################################
# These are patterns to identify common markup errors, to make the
# error messages more informative.
######################################################################
MISSING_ATTRIBUTE_QUOTES = /^<#{NAME_STR}\s+#{NAME_STR}\s*=\s*[^"']/um
def initialize( source )
self.stream = source
end
def add_listener( listener )
if !defined?(@listeners) or !@listeners
@listeners = []
instance_eval <<-EOL
alias :_old_pull :pull
def pull
event = _old_pull
@listeners.each do |listener|
listener.receive event
end
event
end
EOL
end
@listeners << listener
end
attr_reader :source
def stream=( source )
@source = SourceFactory.create_from( source )
@closed = nil
@document_status = nil
@tags = []
@stack = []
@entities = []
@nsstack = []
end
def position
if @source.respond_to? :position
@source.position
else
# FIXME
0
end
end
# Returns true if there are no more events
def empty?
return (@source.empty? and @stack.empty?)
end
# Returns true if there are more events. Synonymous with !empty?
def has_next?
return !(@source.empty? and @stack.empty?)
end
# Push an event back on the head of the stream. This method
# has (theoretically) infinite depth.
def unshift token
@stack.unshift(token)
end
# Peek at the +depth+ event in the stack. The first element on the stack
# is at depth 0. If +depth+ is -1, will parse to the end of the input
# stream and return the last event, which is always :end_document.
# Be aware that this causes the stream to be parsed up to the +depth+
# event, so you can effectively pre-parse the entire document (pull the
# entire thing into memory) using this method.
def peek depth=0
raise %Q[Illegal argument "#{depth}"] if depth < -1
temp = []
if depth == -1
temp.push(pull()) until empty?
else
while @stack.size+temp.size < depth+1
temp.push(pull())
end
end
@stack += temp if temp.size > 0
@stack[depth]
end
# Returns the next event. This is a +PullEvent+ object.
def pull
if @closed
x, @closed = @closed, nil
return [ :end_element, x ]
end
return [ :end_document ] if empty?
return @stack.shift if @stack.size > 0
#STDERR.puts @source.encoding
@source.read if @source.buffer.size<2
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
if @document_status == nil
#@source.consume( /^\s*/um )
word = @source.match( /^((?:\s+)|(?:<[^>]*>))/um )
word = word[1] unless word.nil?
#STDERR.puts "WORD = #{word.inspect}"
case word
when COMMENT_START
return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
when XMLDECL_START
#STDERR.puts "XMLDECL"
results = @source.match( XMLDECL_PATTERN, true )[1]
version = VERSION.match( results )
version = version[1] unless version.nil?
encoding = ENCODING.match(results)
encoding = encoding[1] unless encoding.nil?
@source.encoding = encoding
standalone = STANDALONE.match(results)
standalone = standalone[1] unless standalone.nil?
return [ :xmldecl, version, encoding, standalone ]
when INSTRUCTION_START
return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
when DOCTYPE_START
md = @source.match( DOCTYPE_PATTERN, true )
@nsstack.unshift(curr_ns=Set.new)
identity = md[1]
close = md[2]
identity =~ IDENTITY
name = $1
raise REXML::ParseException.new("DOCTYPE is missing a name") if name.nil?
pub_sys = $2.nil? ? nil : $2.strip
long_name = $4.nil? ? nil : $4.strip
uri = $6.nil? ? nil : $6.strip
args = [ :start_doctype, name, pub_sys, long_name, uri ]
if close == ">"
@document_status = :after_doctype
@source.read if @source.buffer.size<2
md = @source.match(/^\s*/um, true)
@stack << [ :end_doctype ]
else
@document_status = :in_doctype
end
return args
when /^\s+/
else
@document_status = :after_doctype
@source.read if @source.buffer.size<2
md = @source.match(/\s*/um, true)
if @source.encoding == "UTF-8"
if @source.buffer.respond_to? :force_encoding
@source.buffer.force_encoding(Encoding::UTF_8)
end
end
end
end
if @document_status == :in_doctype
md = @source.match(/\s*(.*?>)/um)
case md[1]
when SYSTEMENTITY
match = @source.match( SYSTEMENTITY, true )[1]
return [ :externalentity, match ]
when ELEMENTDECL_START
return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
when ENTITY_START
match = @source.match( ENTITYDECL, true ).to_a.compact
match[0] = :entitydecl
ref = false
if match[1] == '%'
ref = true
match.delete_at 1
end
# Now we have to sort out what kind of entity reference this is
if match[2] == 'SYSTEM'
# External reference
match[3] = match[3][1..-2] # PUBID
match.delete_at(4) if match.size > 4 # Chop out NDATA decl
# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
elsif match[2] == 'PUBLIC'
# External reference
match[3] = match[3][1..-2] # PUBID
match[4] = match[4][1..-2] # HREF
# match is [ :entity, name, PUBLIC, pubid, href ]
else
match[2] = match[2][1..-2]
match.pop if match.size == 4
# match is [ :entity, name, value ]
end
match << '%' if ref
return match
when ATTLISTDECL_START
md = @source.match( ATTLISTDECL_PATTERN, true )
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
element = md[1]
contents = md[0]
pairs = {}
values = md[0].scan( ATTDEF_RE )
values.each do |attdef|
unless attdef[3] == "#IMPLIED"
attdef.compact!
val = attdef[3]
val = attdef[4] if val == "#FIXED "
pairs[attdef[0]] = val
if attdef[0] =~ /^xmlns:(.*)/
@nsstack[0] << $1
end
end
end
return [ :attlistdecl, element, pairs, contents ]
when NOTATIONDECL_START
md = nil
if @source.match( PUBLIC )
md = @source.match( PUBLIC, true )
vals = [md[1],md[2],md[4],md[6]]
elsif @source.match( SYSTEM )
md = @source.match( SYSTEM, true )
vals = [md[1],md[2],nil,md[4]]
else
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
end
return [ :notationdecl, *vals ]
when CDATA_END
@document_status = :after_doctype
@source.match( CDATA_END, true )
return [ :end_doctype ]
end
end
begin
if @source.buffer[0] == ?<
if @source.buffer[1] == ?/
@nsstack.shift
last_tag = @tags.pop
#md = @source.match_to_consume( '>', CLOSE_MATCH)
md = @source.match( CLOSE_MATCH, true )
raise REXML::ParseException.new( "Missing end tag for "+
"'#{last_tag}' (got \"#{md[1]}\")",
@source) unless last_tag == md[1]
return [ :end_element, last_tag ]
elsif @source.buffer[1] == ?!
md = @source.match(/\A(\s*[^>]*>)/um)
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
raise REXML::ParseException.new("Malformed node", @source) unless md
if md[0][2] == ?-
md = @source.match( COMMENT_PATTERN, true )
case md[1]
when /--/, /-$/
raise REXML::ParseException.new("Malformed comment", @source)
end
return [ :comment, md[1] ] if md
else
md = @source.match( CDATA_PATTERN, true )
return [ :cdata, md[1] ] if md
end
raise REXML::ParseException.new( "Declarations can only occur "+
"in the doctype declaration.", @source)
elsif @source.buffer[1] == ??
md = @source.match( INSTRUCTION_PATTERN, true )
return [ :processing_instruction, md[1], md[2] ] if md
raise REXML::ParseException.new( "Bad instruction declaration",
@source)
else
# Get the next tag
md = @source.match(TAG_MATCH, true)
unless md
# Check for missing attribute quotes
raise REXML::ParseException.new("missing attribute quote", @source) if @source.match(MISSING_ATTRIBUTE_QUOTES )
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
end
attributes = {}
prefixes = Set.new
prefixes << md[2] if md[2]
@nsstack.unshift(curr_ns=Set.new)
if md[4].size > 0
attrs = md[4].scan( ATTRIBUTE_PATTERN )
raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
attrs.each { |a,b,c,d,e|
if b == "xmlns"
if c == "xml"
if d != "http://www.w3.org/XML/1998/namespace"
msg = "The 'xml' prefix must not be bound to any other namespace "+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
raise REXML::ParseException.new( msg, @source, self )
end
elsif c == "xmlns"
msg = "The 'xmlns' prefix must not be declared "+
"(http://www.w3.org/TR/REC-xml-names/#ns-decl)"
raise REXML::ParseException.new( msg, @source, self)
end
curr_ns << c
elsif b
prefixes << b unless b == "xml"
end
if attributes.has_key? a
msg = "Duplicate attribute #{a.inspect}"
raise REXML::ParseException.new( msg, @source, self)
end
attributes[a] = e
}
end
# Verify that all of the prefixes have been defined
for prefix in prefixes
unless @nsstack.find{|k| k.member?(prefix)}
raise UndefinedNamespaceException.new(prefix,@source,self)
end
end
if md[6]
@closed = md[1]
@nsstack.shift
else
@tags.push( md[1] )
end
return [ :start_element, md[1], attributes ]
end
else
md = @source.match( TEXT_PATTERN, true )
if md[0].length == 0
@source.match( /(\s+)/, true )
end
#STDERR.puts "GOT #{md[1].inspect}" unless md[0].length == 0
#return [ :text, "" ] if md[0].length == 0
# unnormalized = Text::unnormalize( md[1], self )
# return PullEvent.new( :text, md[1], unnormalized )
return [ :text, md[1] ]
end
rescue REXML::UndefinedNamespaceException
raise
rescue REXML::ParseException
raise
rescue Exception, NameError => error
raise REXML::ParseException.new( "Exception parsing",
@source, self, (error ? error : $!) )
end
return [ :dummy ]
end
def entity( reference, entities )
value = nil
value = entities[ reference ] if entities
if not value
value = DEFAULT_ENTITIES[ reference ]
value = value[2] if value
end
unnormalize( value, entities ) if value
end
# Escapes all possible entities
def normalize( input, entities=nil, entity_filter=nil )
copy = input.clone
# Doing it like this rather than in a loop improves the speed
copy.gsub!( EREFERENCE, '&amp;' )
entities.each do |key, value|
copy.gsub!( value, "&#{key};" ) unless entity_filter and
entity_filter.include?(entity)
end if entities
copy.gsub!( EREFERENCE, '&amp;' )
DEFAULT_ENTITIES.each do |key, value|
copy.gsub!( value[3], value[1] )
end
copy
end
# Unescapes all possible entities
def unnormalize( string, entities=nil, filter=nil )
rv = string.clone
rv.gsub!( /\r\n?/, "\n" )
matches = rv.scan( REFERENCE_RE )
return rv if matches.size == 0
rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
m=$1
m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*')
}
matches.collect!{|x|x[0]}.compact!
if matches.size > 0
matches.each do |entity_reference|
unless filter and filter.include?(entity_reference)
entity_value = entity( entity_reference, entities )
if entity_value
re = /&#{entity_reference};/
rv.gsub!( re, entity_value )
else
er = DEFAULT_ENTITIES[entity_reference]
rv.gsub!( er[0], er[2] ) if er
end
end
end
rv.gsub!( /&amp;/, '&' )
end
rv
end
end
end
end
=begin
case event[0]
when :start_element
when :text
when :end_element
when :processing_instruction
when :cdata
when :comment
when :xmldecl
when :start_doctype
when :end_doctype
when :externalentity
when :elementdecl
when :entity
when :attlistdecl
when :notationdecl
when :end_doctype
end
=end

View file

@ -0,0 +1,60 @@
require 'rexml/parsers/streamparser'
require 'rexml/parsers/baseparser'
require 'rexml/light/node'
module REXML
module Parsers
class LightParser
def initialize stream
@stream = stream
@parser = REXML::Parsers::BaseParser.new( stream )
end
def add_listener( listener )
@parser.add_listener( listener )
end
def rewind
@stream.rewind
@parser.stream = @stream
end
def parse
root = context = [ :document ]
while true
event = @parser.pull
case event[0]
when :end_document
break
when :end_doctype
context = context[1]
when :start_element, :start_doctype
new_node = event
context << new_node
new_node[1,0] = [context]
context = new_node
when :end_element, :end_doctype
context = context[1]
else
new_node = event
context << new_node
new_node[1,0] = [context]
end
end
root
end
end
# An element is an array. The array contains:
# 0 The parent element
# 1 The tag name
# 2 A hash of attributes
# 3..-1 The child elements
# An element is an array of size > 3
# Text is a String
# PIs are [ :processing_instruction, target, data ]
# Comments are [ :comment, data ]
# DocTypes are DocType structs
# The root is an array with XMLDecls, Text, DocType, Array, Text
end
end

View file

@ -0,0 +1,196 @@
require 'forwardable'
require 'rexml/parseexception'
require 'rexml/parsers/baseparser'
require 'rexml/xmltokens'
module REXML
module Parsers
# = Using the Pull Parser
# <em>This API is experimental, and subject to change.</em>
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
# while parser.has_next?
# res = parser.next
# puts res[1]['att'] if res.start_tag? and res[0] == 'b'
# end
# See the PullEvent class for information on the content of the results.
# The data is identical to the arguments passed for the various events to
# the StreamListener API.
#
# Notice that:
# parser = PullParser.new( "<a>BAD DOCUMENT" )
# while parser.has_next?
# res = parser.next
# raise res[1] if res.error?
# end
#
# Nat Price gave me some good ideas for the API.
class PullParser
include XMLTokens
extend Forwardable
def_delegators( :@parser, :has_next? )
def_delegators( :@parser, :entity )
def_delegators( :@parser, :empty? )
def_delegators( :@parser, :source )
def initialize stream
@entities = {}
@listeners = nil
@parser = BaseParser.new( stream )
@my_stack = []
end
def add_listener( listener )
@listeners = [] unless @listeners
@listeners << listener
end
def each
while has_next?
yield self.pull
end
end
def peek depth=0
if @my_stack.length <= depth
(depth - @my_stack.length + 1).times {
e = PullEvent.new(@parser.pull)
@my_stack.push(e)
}
end
@my_stack[depth]
end
def pull
return @my_stack.shift if @my_stack.length > 0
event = @parser.pull
case event[0]
when :entitydecl
@entities[ event[1] ] =
event[2] unless event[2] =~ /PUBLIC|SYSTEM/
when :text
unnormalized = @parser.unnormalize( event[1], @entities )
event << unnormalized
end
PullEvent.new( event )
end
def unshift token
@my_stack.unshift token
end
end
# A parsing event. The contents of the event are accessed as an +Array?,
# and the type is given either by the ...? methods, or by accessing the
# +type+ accessor. The contents of this object vary from event to event,
# but are identical to the arguments passed to +StreamListener+s for each
# event.
class PullEvent
# The type of this event. Will be one of :tag_start, :tag_end, :text,
# :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
# :notationdecl, :entity, :cdata, :xmldecl, or :error.
def initialize(arg)
@contents = arg
end
def []( start, endd=nil)
if start.kind_of? Range
@contents.slice( start.begin+1 .. start.end )
elsif start.kind_of? Numeric
if endd.nil?
@contents.slice( start+1 )
else
@contents.slice( start+1, endd )
end
else
raise "Illegal argument #{start.inspect} (#{start.class})"
end
end
def event_type
@contents[0]
end
# Content: [ String tag_name, Hash attributes ]
def start_element?
@contents[0] == :start_element
end
# Content: [ String tag_name ]
def end_element?
@contents[0] == :end_element
end
# Content: [ String raw_text, String unnormalized_text ]
def text?
@contents[0] == :text
end
# Content: [ String text ]
def instruction?
@contents[0] == :processing_instruction
end
# Content: [ String text ]
def comment?
@contents[0] == :comment
end
# Content: [ String name, String pub_sys, String long_name, String uri ]
def doctype?
@contents[0] == :start_doctype
end
# Content: [ String text ]
def attlistdecl?
@contents[0] == :attlistdecl
end
# Content: [ String text ]
def elementdecl?
@contents[0] == :elementdecl
end
# Due to the wonders of DTDs, an entity declaration can be just about
# anything. There's no way to normalize it; you'll have to interpret the
# content yourself. However, the following is true:
#
# * If the entity declaration is an internal entity:
# [ String name, String value ]
# Content: [ String text ]
def entitydecl?
@contents[0] == :entitydecl
end
# Content: [ String text ]
def notationdecl?
@contents[0] == :notationdecl
end
# Content: [ String text ]
def entity?
@contents[0] == :entity
end
# Content: [ String text ]
def cdata?
@contents[0] == :cdata
end
# Content: [ String version, String encoding, String standalone ]
def xmldecl?
@contents[0] == :xmldecl
end
def error?
@contents[0] == :error
end
def inspect
@contents[0].to_s + ": " + @contents[1..-1].inspect
end
end
end
end

View file

@ -0,0 +1,247 @@
require 'rexml/parsers/baseparser'
require 'rexml/parseexception'
require 'rexml/namespace'
require 'rexml/text'
module REXML
module Parsers
# SAX2Parser
class SAX2Parser
def initialize source
@parser = BaseParser.new(source)
@listeners = []
@procs = []
@namespace_stack = []
@has_listeners = false
@tag_stack = []
@entities = {}
end
def source
@parser.source
end
def add_listener( listener )
@parser.add_listener( listener )
end
# Listen arguments:
#
# Symbol, Array, Block
# Listen to Symbol events on Array elements
# Symbol, Block
# Listen to Symbol events
# Array, Listener
# Listen to all events on Array elements
# Array, Block
# Listen to :start_element events on Array elements
# Listener
# Listen to All events
#
# Symbol can be one of: :start_element, :end_element,
# :start_prefix_mapping, :end_prefix_mapping, :characters,
# :processing_instruction, :doctype, :attlistdecl, :elementdecl,
# :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
#
# There is an additional symbol that can be listened for: :progress.
# This will be called for every event generated, passing in the current
# stream position.
#
# Array contains regular expressions or strings which will be matched
# against fully qualified element names.
#
# Listener must implement the methods in SAX2Listener
#
# Block will be passed the same arguments as a SAX2Listener method would
# be, where the method name is the same as the matched Symbol.
# See the SAX2Listener for more information.
def listen( *args, &blok )
if args[0].kind_of? Symbol
if args.size == 2
args[1].each { |match| @procs << [args[0], match, blok] }
else
add( [args[0], nil, blok] )
end
elsif args[0].kind_of? Array
if args.size == 2
args[0].each { |match| add( [nil, match, args[1]] ) }
else
args[0].each { |match| add( [ :start_element, match, blok ] ) }
end
else
add([nil, nil, args[0]])
end
end
def deafen( listener=nil, &blok )
if listener
@listeners.delete_if {|item| item[-1] == listener }
@has_listeners = false if @listeners.size == 0
else
@procs.delete_if {|item| item[-1] == blok }
end
end
def parse
@procs.each { |sym,match,block| block.call if sym == :start_document }
@listeners.each { |sym,match,block|
block.start_document if sym == :start_document or sym.nil?
}
root = context = []
while true
event = @parser.pull
case event[0]
when :end_document
handle( :end_document )
break
when :start_doctype
handle( :doctype, *event[1..-1])
when :end_doctype
context = context[1]
when :start_element
@tag_stack.push(event[1])
# find the observers for namespaces
procs = get_procs( :start_prefix_mapping, event[1] )
listeners = get_listeners( :start_prefix_mapping, event[1] )
if procs or listeners
# break out the namespace declarations
# The attributes live in event[2]
event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
nsdecl.collect! { |n, value| [ n[6..-1], value ] }
@namespace_stack.push({})
nsdecl.each do |n,v|
@namespace_stack[-1][n] = v
# notify observers of namespaces
procs.each { |ob| ob.call( n, v ) } if procs
listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
end
end
event[1] =~ Namespace::NAMESPLIT
prefix = $1
local = $2
uri = get_namespace(prefix)
# find the observers for start_element
procs = get_procs( :start_element, event[1] )
listeners = get_listeners( :start_element, event[1] )
# notify observers
procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
listeners.each { |ob|
ob.start_element( uri, local, event[1], event[2] )
} if listeners
when :end_element
@tag_stack.pop
event[1] =~ Namespace::NAMESPLIT
prefix = $1
local = $2
uri = get_namespace(prefix)
# find the observers for start_element
procs = get_procs( :end_element, event[1] )
listeners = get_listeners( :end_element, event[1] )
# notify observers
procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
listeners.each { |ob|
ob.end_element( uri, local, event[1] )
} if listeners
namespace_mapping = @namespace_stack.pop
# find the observers for namespaces
procs = get_procs( :end_prefix_mapping, event[1] )
listeners = get_listeners( :end_prefix_mapping, event[1] )
if procs or listeners
namespace_mapping.each do |ns_prefix, ns_uri|
# notify observers of namespaces
procs.each { |ob| ob.call( ns_prefix ) } if procs
listeners.each { |ob| ob.end_prefix_mapping(ns_prefix) } if listeners
end
end
when :text
#normalized = @parser.normalize( event[1] )
#handle( :characters, normalized )
copy = event[1].clone
esub = proc { |match|
if @entities.has_key?($1)
@entities[$1].gsub(Text::REFERENCE, &esub)
else
match
end
}
copy.gsub!( Text::REFERENCE, &esub )
copy.gsub!( Text::NUMERICENTITY ) {|m|
m=$1
m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*')
}
handle( :characters, copy )
when :entitydecl
@entities[ event[1] ] = event[2] if event.size == 3
handle( *event )
when :processing_instruction, :comment, :attlistdecl,
:elementdecl, :cdata, :notationdecl, :xmldecl
handle( *event )
end
handle( :progress, @parser.position )
end
end
private
def handle( symbol, *arguments )
tag = @tag_stack[-1]
procs = get_procs( symbol, tag )
listeners = get_listeners( symbol, tag )
# notify observers
procs.each { |ob| ob.call( *arguments ) } if procs
listeners.each { |l|
l.send( symbol.to_s, *arguments )
} if listeners
end
# The following methods are duplicates, but it is faster than using
# a helper
def get_procs( symbol, name )
return nil if @procs.size == 0
@procs.find_all do |sym, match, block|
#puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
(
(sym.nil? or symbol == sym) and
((name.nil? and match.nil?) or match.nil? or (
(name == match) or
(match.kind_of? Regexp and name =~ match)
)
)
)
end.collect{|x| x[-1]}
end
def get_listeners( symbol, name )
return nil if @listeners.size == 0
@listeners.find_all do |sym, match, block|
(
(sym.nil? or symbol == sym) and
((name.nil? and match.nil?) or match.nil? or (
(name == match) or
(match.kind_of? Regexp and name =~ match)
)
)
)
end.collect{|x| x[-1]}
end
def add( pair )
if pair[-1].respond_to? :call
@procs << pair unless @procs.include? pair
else
@listeners << pair unless @listeners.include? pair
@has_listeners = true
end
end
def get_namespace( prefix )
uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
(@namespace_stack.find { |ns| not ns[nil].nil? })
uris[-1][prefix] unless uris.nil? or 0 == uris.size
end
end
end
end

View file

@ -0,0 +1,46 @@
module REXML
module Parsers
class StreamParser
def initialize source, listener
@listener = listener
@parser = BaseParser.new( source )
end
def add_listener( listener )
@parser.add_listener( listener )
end
def parse
# entity string
while true
event = @parser.pull
case event[0]
when :end_document
return
when :start_element
attrs = event[2].each do |n, v|
event[2][n] = @parser.unnormalize( v )
end
@listener.tag_start( event[1], attrs )
when :end_element
@listener.tag_end( event[1] )
when :text
normalized = @parser.unnormalize( event[1] )
@listener.text( normalized )
when :processing_instruction
@listener.instruction( *event[1,2] )
when :start_doctype
@listener.doctype( *event[1..-1] )
when :end_doctype
# FIXME: remove this condition for milestone:3.2
@listener.doctype_end if @listener.respond_to? :doctype_end
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
@listener.send( event[0].to_s, *event[1..-1] )
when :entitydecl, :notationdecl
@listener.send( event[0].to_s, event[1..-1] )
end
end
end
end
end
end

View file

@ -0,0 +1,100 @@
require 'rexml/validation/validationexception'
require 'rexml/undefinednamespaceexception'
module REXML
module Parsers
class TreeParser
def initialize( source, build_context = Document.new )
@build_context = build_context
@parser = Parsers::BaseParser.new( source )
end
def add_listener( listener )
@parser.add_listener( listener )
end
def parse
tag_stack = []
in_doctype = false
entities = nil
begin
while true
event = @parser.pull
#STDERR.puts "TREEPARSER GOT #{event.inspect}"
case event[0]
when :end_document
unless tag_stack.empty?
#raise ParseException.new("No close tag for #{tag_stack.inspect}")
raise ParseException.new("No close tag for #{@build_context.xpath}")
end
return
when :start_element
tag_stack.push(event[1])
el = @build_context = @build_context.add_element( event[1] )
event[2].each do |key, value|
el.attributes[key]=Attribute.new(key,value,self)
end
when :end_element
tag_stack.pop
@build_context = @build_context.parent
when :text
if not in_doctype
if @build_context[-1].instance_of? Text
@build_context[-1] << event[1]
else
@build_context.add(
Text.new(event[1], @build_context.whitespace, nil, true)
) unless (
@build_context.ignore_whitespace_nodes and
event[1].strip.size==0
)
end
end
when :comment
c = Comment.new( event[1] )
@build_context.add( c )
when :cdata
c = CData.new( event[1] )
@build_context.add( c )
when :processing_instruction
@build_context.add( Instruction.new( event[1], event[2] ) )
when :end_doctype
in_doctype = false
entities.each { |k,v| entities[k] = @build_context.entities[k].value }
@build_context = @build_context.parent
when :start_doctype
doctype = DocType.new( event[1..-1], @build_context )
@build_context = doctype
entities = {}
in_doctype = true
when :attlistdecl
n = AttlistDecl.new( event[1..-1] )
@build_context.add( n )
when :externalentity
n = ExternalEntity.new( event[1] )
@build_context.add( n )
when :elementdecl
n = ElementDecl.new( event[1] )
@build_context.add(n)
when :entitydecl
entities[ event[1] ] = event[2] unless event[2] =~ /PUBLIC|SYSTEM/
@build_context.add(Entity.new(event))
when :notationdecl
n = NotationDecl.new( *event[1..-1] )
@build_context.add( n )
when :xmldecl
x = XMLDecl.new( event[1], event[2], event[3] )
@build_context.add( x )
end
end
rescue REXML::Validation::ValidationException
raise
rescue REXML::UndefinedNamespaceException
raise
rescue
raise ParseException.new( $!.message, @parser.source, @parser, $! )
end
end
end
end
end

View file

@ -0,0 +1,56 @@
require 'rexml/parsers/streamparser'
require 'rexml/parsers/baseparser'
module REXML
module Parsers
class UltraLightParser
def initialize stream
@stream = stream
@parser = REXML::Parsers::BaseParser.new( stream )
end
def add_listener( listener )
@parser.add_listener( listener )
end
def rewind
@stream.rewind
@parser.stream = @stream
end
def parse
root = context = []
while true
event = @parser.pull
case event[0]
when :end_document
break
when :end_doctype
context = context[1]
when :start_element, :doctype
context << event
event[1,0] = [context]
context = event
when :end_element
context = context[1]
else
context << event
end
end
root
end
end
# An element is an array. The array contains:
# 0 The parent element
# 1 The tag name
# 2 A hash of attributes
# 3..-1 The child elements
# An element is an array of size > 3
# Text is a String
# PIs are [ :processing_instruction, target, data ]
# Comments are [ :comment, data ]
# DocTypes are DocType structs
# The root is an array with XMLDecls, Text, DocType, Array, Text
end
end

View file

@ -0,0 +1,698 @@
require 'rexml/namespace'
require 'rexml/xmltokens'
module REXML
module Parsers
# You don't want to use this class. Really. Use XPath, which is a wrapper
# for this class. Believe me. You don't want to poke around in here.
# There is strange, dark magic at work in this code. Beware. Go back! Go
# back while you still can!
class XPathParser
include XMLTokens
LITERAL = /^'([^']*)'|^"([^"]*)"/u
def namespaces=( namespaces )
Functions::namespace_context = namespaces
@namespaces = namespaces
end
def parse path
path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
path.gsub!( /\s+([\]\)])/, '\1' )
parsed = []
path = OrExpr(path, parsed)
parsed
end
def predicate path
parsed = []
Predicate( "[#{path}]", parsed )
parsed
end
def abbreviate( path )
path = path.kind_of?(String) ? parse( path ) : path
string = ""
document = false
while path.size > 0
op = path.shift
case op
when :node
when :attribute
string << "/" if string.size > 0
string << "@"
when :child
string << "/" if string.size > 0
when :descendant_or_self
string << "/"
when :self
string << "."
when :parent
string << ".."
when :any
string << "*"
when :text
string << "text()"
when :following, :following_sibling,
:ancestor, :ancestor_or_self, :descendant,
:namespace, :preceding, :preceding_sibling
string << "/" unless string.size == 0
string << op.to_s.tr("_", "-")
string << "::"
when :qname
prefix = path.shift
name = path.shift
string << prefix+":" if prefix.size > 0
string << name
when :predicate
string << '['
string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
string << ']'
when :document
document = true
when :function
string << path.shift
string << "( "
string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
string << " )"
when :literal
string << %Q{ "#{path.shift}" }
else
string << "/" unless string.size == 0
string << "UNKNOWN("
string << op.inspect
string << ")"
end
end
string = "/"+string if document
return string
end
def expand( path )
path = path.kind_of?(String) ? parse( path ) : path
string = ""
document = false
while path.size > 0
op = path.shift
case op
when :node
string << "node()"
when :attribute, :child, :following, :following_sibling,
:ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
:namespace, :preceding, :preceding_sibling, :self, :parent
string << "/" unless string.size == 0
string << op.to_s.tr("_", "-")
string << "::"
when :any
string << "*"
when :qname
prefix = path.shift
name = path.shift
string << prefix+":" if prefix.size > 0
string << name
when :predicate
string << '['
string << predicate_to_string( path.shift ) { |x| expand(x) }
string << ']'
when :document
document = true
else
string << "/" unless string.size == 0
string << "UNKNOWN("
string << op.inspect
string << ")"
end
end
string = "/"+string if document
return string
end
def predicate_to_string( path, &block )
string = ""
case path[0]
when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
op = path.shift
case op
when :eq
op = "="
when :lt
op = "<"
when :gt
op = ">"
when :lteq
op = "<="
when :gteq
op = ">="
when :neq
op = "!="
when :union
op = "|"
end
left = predicate_to_string( path.shift, &block )
right = predicate_to_string( path.shift, &block )
string << " "
string << left
string << " "
string << op.to_s
string << " "
string << right
string << " "
when :function
path.shift
name = path.shift
string << name
string << "( "
string << predicate_to_string( path.shift, &block )
string << " )"
when :literal
path.shift
string << " "
string << path.shift.inspect
string << " "
else
string << " "
string << yield( path )
string << " "
end
return string.squeeze(" ")
end
private
#LocationPath
# | RelativeLocationPath
# | '/' RelativeLocationPath?
# | '//' RelativeLocationPath
def LocationPath path, parsed
#puts "LocationPath '#{path}'"
path = path.strip
if path[0] == ?/
parsed << :document
if path[1] == ?/
parsed << :descendant_or_self
parsed << :node
path = path[2..-1]
else
path = path[1..-1]
end
end
#puts parsed.inspect
return RelativeLocationPath( path, parsed ) if path.size > 0
end
#RelativeLocationPath
# | Step
# | (AXIS_NAME '::' | '@' | '') AxisSpecifier
# NodeTest
# Predicate
# | '.' | '..' AbbreviatedStep
# | RelativeLocationPath '/' Step
# | RelativeLocationPath '//' Step
AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
def RelativeLocationPath path, parsed
#puts "RelativeLocationPath #{path}"
while path.size > 0
# (axis or @ or <child::>) nodetest predicate >
# OR > / Step
# (. or ..) >
if path[0] == ?.
if path[1] == ?.
parsed << :parent
parsed << :node
path = path[2..-1]
else
parsed << :self
parsed << :node
path = path[1..-1]
end
else
if path[0] == ?@
#puts "ATTRIBUTE"
parsed << :attribute
path = path[1..-1]
# Goto Nodetest
elsif path =~ AXIS
parsed << $1.tr('-','_').intern
path = $'
# Goto Nodetest
else
parsed << :child
end
#puts "NODETESTING '#{path}'"
n = []
path = NodeTest( path, n)
#puts "NODETEST RETURNED '#{path}'"
if path[0] == ?[
path = Predicate( path, n )
end
parsed.concat(n)
end
if path.size > 0
if path[0] == ?/
if path[1] == ?/
parsed << :descendant_or_self
parsed << :node
path = path[2..-1]
else
path = path[1..-1]
end
else
return path
end
end
end
return path
end
# Returns a 1-1 map of the nodeset
# The contents of the resulting array are either:
# true/false, if a positive match
# String, if a name match
#NodeTest
# | ('*' | NCNAME ':' '*' | QNAME) NameTest
# | NODE_TYPE '(' ')' NodeType
# | PI '(' LITERAL ')' PI
# | '[' expr ']' Predicate
NCNAMETEST= /^(#{NCNAME_STR}):\*/u
QNAME = Namespace::NAMESPLIT
NODE_TYPE = /^(comment|text|node)\(\s*\)/m
PI = /^processing-instruction\(/
def NodeTest path, parsed
#puts "NodeTest with #{path}"
res = nil
case path
when /^\*/
path = $'
parsed << :any
when NODE_TYPE
type = $1
path = $'
parsed << type.tr('-', '_').intern
when PI
path = $'
literal = nil
if path !~ /^\s*\)/
path =~ LITERAL
literal = $1
path = $'
raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
path = path[1..-1]
end
parsed << :processing_instruction
parsed << (literal || '')
when NCNAMETEST
#puts "NCNAMETEST"
prefix = $1
path = $'
parsed << :namespace
parsed << prefix
when QNAME
#puts "QNAME"
prefix = $1
name = $2
path = $'
prefix = "" unless prefix
parsed << :qname
parsed << prefix
parsed << name
end
return path
end
# Filters the supplied nodeset on the predicate(s)
def Predicate path, parsed
#puts "PREDICATE with #{path}"
return nil unless path[0] == ?[
predicates = []
while path[0] == ?[
path, expr = get_group(path)
predicates << expr[1..-2] if expr
end
#puts "PREDICATES = #{predicates.inspect}"
predicates.each{ |pred|
#puts "ORING #{pred}"
preds = []
parsed << :predicate
parsed << preds
OrExpr(pred, preds)
}
#puts "PREDICATES = #{predicates.inspect}"
path
end
# The following return arrays of true/false, a 1-1 mapping of the
# supplied nodeset, except for axe(), which returns a filtered
# nodeset
#| OrExpr S 'or' S AndExpr
#| AndExpr
def OrExpr path, parsed
#puts "OR >>> #{path}"
n = []
rest = AndExpr( path, n )
#puts "OR <<< #{rest}"
if rest != path
while rest =~ /^\s*( or )/
n = [ :or, n, [] ]
rest = AndExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| AndExpr S 'and' S EqualityExpr
#| EqualityExpr
def AndExpr path, parsed
#puts "AND >>> #{path}"
n = []
rest = EqualityExpr( path, n )
#puts "AND <<< #{rest}"
if rest != path
while rest =~ /^\s*( and )/
n = [ :and, n, [] ]
#puts "AND >>> #{rest}"
rest = EqualityExpr( $', n[-1] )
#puts "AND <<< #{rest}"
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| EqualityExpr ('=' | '!=') RelationalExpr
#| RelationalExpr
def EqualityExpr path, parsed
#puts "EQUALITY >>> #{path}"
n = []
rest = RelationalExpr( path, n )
#puts "EQUALITY <<< #{rest}"
if rest != path
while rest =~ /^\s*(!?=)\s*/
if $1[0] == ?!
n = [ :neq, n, [] ]
else
n = [ :eq, n, [] ]
end
rest = RelationalExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
#| AdditiveExpr
def RelationalExpr path, parsed
#puts "RELATION >>> #{path}"
n = []
rest = AdditiveExpr( path, n )
#puts "RELATION <<< #{rest}"
if rest != path
while rest =~ /^\s*([<>]=?)\s*/
if $1[0] == ?<
sym = "lt"
else
sym = "gt"
end
sym << "eq" if $1[-1] == ?=
n = [ sym.intern, n, [] ]
rest = AdditiveExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| AdditiveExpr ('+' | S '-') MultiplicativeExpr
#| MultiplicativeExpr
def AdditiveExpr path, parsed
#puts "ADDITIVE >>> #{path}"
n = []
rest = MultiplicativeExpr( path, n )
#puts "ADDITIVE <<< #{rest}"
if rest != path
while rest =~ /^\s*(\+| -)\s*/
if $1[0] == ?+
n = [ :plus, n, [] ]
else
n = [ :minus, n, [] ]
end
rest = MultiplicativeExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
#| UnaryExpr
def MultiplicativeExpr path, parsed
#puts "MULT >>> #{path}"
n = []
rest = UnaryExpr( path, n )
#puts "MULT <<< #{rest}"
if rest != path
while rest =~ /^\s*(\*| div | mod )\s*/
if $1[0] == ?*
n = [ :mult, n, [] ]
elsif $1.include?( "div" )
n = [ :div, n, [] ]
else
n = [ :mod, n, [] ]
end
rest = UnaryExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| '-' UnaryExpr
#| UnionExpr
def UnaryExpr path, parsed
path =~ /^(\-*)/
path = $'
if $1 and (($1.size % 2) != 0)
mult = -1
else
mult = 1
end
parsed << :neg if mult < 0
#puts "UNARY >>> #{path}"
n = []
path = UnionExpr( path, n )
#puts "UNARY <<< #{path}"
parsed.concat( n )
path
end
#| UnionExpr '|' PathExpr
#| PathExpr
def UnionExpr path, parsed
#puts "UNION >>> #{path}"
n = []
rest = PathExpr( path, n )
#puts "UNION <<< #{rest}"
if rest != path
while rest =~ /^\s*(\|)\s*/
n = [ :union, n, [] ]
rest = PathExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace( n )
elsif n.size > 0
parsed << n
end
rest
end
#| LocationPath
#| FilterExpr ('/' | '//') RelativeLocationPath
def PathExpr path, parsed
path =~ /^\s*/
path = $'
#puts "PATH >>> #{path}"
n = []
rest = FilterExpr( path, n )
#puts "PATH <<< '#{rest}'"
if rest != path
if rest and rest[0] == ?/
return RelativeLocationPath(rest, n)
end
end
#puts "BEFORE WITH '#{rest}'"
rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/
parsed.concat(n)
return rest
end
#| FilterExpr Predicate
#| PrimaryExpr
def FilterExpr path, parsed
#puts "FILTER >>> #{path}"
n = []
path = PrimaryExpr( path, n )
#puts "FILTER <<< #{path}"
path = Predicate(path, n) if path and path[0] == ?[
#puts "FILTER <<< #{path}"
parsed.concat(n)
path
end
#| VARIABLE_REFERENCE
#| '(' expr ')'
#| LITERAL
#| NUMBER
#| FunctionCall
VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u
NUMBER = /^(\d*\.?\d+)/
NT = /^comment|text|processing-instruction|node$/
def PrimaryExpr path, parsed
arry = []
case path
when VARIABLE_REFERENCE
varname = $1
path = $'
parsed << :variable
parsed << varname
#arry << @variables[ varname ]
when /^(\w[-\w]*)(?:\()/
#puts "PrimaryExpr :: Function >>> #$1 -- '#$''"
fname = $1
tmp = $'
#puts "#{fname} =~ #{NT.inspect}"
return path if fname =~ NT
path = tmp
parsed << :function
parsed << fname
path = FunctionCall(path, parsed)
when NUMBER
#puts "LITERAL or NUMBER: #$1"
varname = $1.nil? ? $2 : $1
path = $'
parsed << :literal
parsed << (varname.include?('.') ? varname.to_f : varname.to_i)
when LITERAL
#puts "LITERAL or NUMBER: #$1"
varname = $1.nil? ? $2 : $1
path = $'
parsed << :literal
parsed << varname
when /^\(/ #/
path, contents = get_group(path)
contents = contents[1..-2]
n = []
OrExpr( contents, n )
parsed.concat(n)
end
path
end
#| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
def FunctionCall rest, parsed
path, arguments = parse_args(rest)
argset = []
for argument in arguments
args = []
OrExpr( argument, args )
argset << args
end
parsed << argset
path
end
# get_group( '[foo]bar' ) -> ['bar', '[foo]']
def get_group string
ind = 0
depth = 0
st = string[0,1]
en = (st == "(" ? ")" : "]")
begin
case string[ind,1]
when st
depth += 1
when en
depth -= 1
end
ind += 1
end while depth > 0 and ind < string.length
return nil unless depth==0
[string[ind..-1], string[0..ind-1]]
end
def parse_args( string )
arguments = []
ind = 0
inquot = false
inapos = false
depth = 1
begin
case string[ind]
when ?"
inquot = !inquot unless inapos
when ?'
inapos = !inapos unless inquot
else
unless inquot or inapos
case string[ind]
when ?(
depth += 1
if depth == 1
string = string[1..-1]
ind -= 1
end
when ?)
depth -= 1
if depth == 0
s = string[0,ind].strip
arguments << s unless s == ""
string = string[ind+1..-1]
end
when ?,
if depth == 1
s = string[0,ind].strip
arguments << s unless s == ""
string = string[ind+1..-1]
ind = -1
end
end
end
end
ind += 1
end while depth > 0 and ind < string.length
return nil unless depth==0
[string,arguments]
end
end
end
end

View file

@ -0,0 +1,266 @@
require 'rexml/functions'
require 'rexml/xmltokens'
module REXML
class QuickPath
include Functions
include XMLTokens
EMPTY_HASH = {}
def QuickPath::first element, path, namespaces=EMPTY_HASH
match(element, path, namespaces)[0]
end
def QuickPath::each element, path, namespaces=EMPTY_HASH, &block
path = "*" unless path
match(element, path, namespaces).each( &block )
end
def QuickPath::match element, path, namespaces=EMPTY_HASH
raise "nil is not a valid xpath" unless path
results = nil
Functions::namespace_context = namespaces
case path
when /^\/([^\/]|$)/u
# match on root
path = path[1..-1]
return [element.root.parent] if path == ''
results = filter([element.root], path)
when /^[-\w]*::/u
results = filter([element], path)
when /^\*/u
results = filter(element.to_a, path)
when /^[\[!\w:]/u
# match on child
matches = []
children = element.to_a
results = filter(children, path)
else
results = filter([element], path)
end
return results
end
# Given an array of nodes it filters the array based on the path. The
# result is that when this method returns, the array will contain elements
# which match the path
def QuickPath::filter elements, path
return elements if path.nil? or path == '' or elements.size == 0
case path
when /^\/\//u # Descendant
return axe( elements, "descendant-or-self", $' )
when /^\/?\b(\w[-\w]*)\b::/u # Axe
axe_name = $1
rest = $'
return axe( elements, $1, $' )
when /^\/(?=\b([:!\w][-\.\w]*:)?[-!\*\.\w]*\b([^:(]|$)|\*)/u # Child
rest = $'
results = []
elements.each do |element|
results |= filter( element.to_a, rest )
end
return results
when /^\/?(\w[-\w]*)\(/u # / Function
return function( elements, $1, $' )
when Namespace::NAMESPLIT # Element name
name = $2
ns = $1
rest = $'
elements.delete_if do |element|
!(element.kind_of? Element and
(element.expanded_name == name or
(element.name == name and
element.namespace == Functions.namespace_context[ns])))
end
return filter( elements, rest )
when /^\/\[/u
matches = []
elements.each do |element|
matches |= predicate( element.to_a, path[1..-1] ) if element.kind_of? Element
end
return matches
when /^\[/u # Predicate
return predicate( elements, path )
when /^\/?\.\.\./u # Ancestor
return axe( elements, "ancestor", $' )
when /^\/?\.\./u # Parent
return filter( elements.collect{|e|e.parent}, $' )
when /^\/?\./u # Self
return filter( elements, $' )
when /^\*/u # Any
results = []
elements.each do |element|
results |= filter( [element], $' ) if element.kind_of? Element
#if element.kind_of? Element
# children = element.to_a
# children.delete_if { |child| !child.kind_of?(Element) }
# results |= filter( children, $' )
#end
end
return results
end
return []
end
def QuickPath::axe( elements, axe_name, rest )
matches = []
matches = filter( elements.dup, rest ) if axe_name =~ /-or-self$/u
case axe_name
when /^descendant/u
elements.each do |element|
matches |= filter( element.to_a, "descendant-or-self::#{rest}" ) if element.kind_of? Element
end
when /^ancestor/u
elements.each do |element|
while element.parent
matches << element.parent
element = element.parent
end
end
matches = filter( matches, rest )
when "self"
matches = filter( elements, rest )
when "child"
elements.each do |element|
matches |= filter( element.to_a, rest ) if element.kind_of? Element
end
when "attribute"
elements.each do |element|
matches << element.attributes[ rest ] if element.kind_of? Element
end
when "parent"
matches = filter(elements.collect{|element| element.parent}.uniq, rest)
when "following-sibling"
matches = filter(elements.collect{|element| element.next_sibling}.uniq,
rest)
when "previous-sibling"
matches = filter(elements.collect{|element|
element.previous_sibling}.uniq, rest )
end
return matches.uniq
end
# A predicate filters a node-set with respect to an axis to produce a
# new node-set. For each node in the node-set to be filtered, the
# PredicateExpr is evaluated with that node as the context node, with
# the number of nodes in the node-set as the context size, and with the
# proximity position of the node in the node-set with respect to the
# axis as the context position; if PredicateExpr evaluates to true for
# that node, the node is included in the new node-set; otherwise, it is
# not included.
#
# A PredicateExpr is evaluated by evaluating the Expr and converting
# the result to a boolean. If the result is a number, the result will
# be converted to true if the number is equal to the context position
# and will be converted to false otherwise; if the result is not a
# number, then the result will be converted as if by a call to the
# boolean function. Thus a location path para[3] is equivalent to
# para[position()=3].
def QuickPath::predicate( elements, path )
ind = 1
bcount = 1
while bcount > 0
bcount += 1 if path[ind] == ?[
bcount -= 1 if path[ind] == ?]
ind += 1
end
ind -= 1
predicate = path[1..ind-1]
rest = path[ind+1..-1]
# have to change 'a [=<>] b [=<>] c' into 'a [=<>] b and b [=<>] c'
predicate.gsub!( /([^\s(and)(or)<>=]+)\s*([<>=])\s*([^\s(and)(or)<>=]+)\s*([<>=])\s*([^\s(and)(or)<>=]+)/u ) {
"#$1 #$2 #$3 and #$3 #$4 #$5"
}
# Let's do some Ruby trickery to avoid some work:
predicate.gsub!( /&/u, "&&" )
predicate.gsub!( /=/u, "==" )
predicate.gsub!( /@(\w[-\w.]*)/u ) {
"attribute(\"#$1\")"
}
predicate.gsub!( /\bmod\b/u, "%" )
predicate.gsub!( /\b(\w[-\w.]*\()/u ) {
fname = $1
fname.gsub( /-/u, "_" )
}
Functions.pair = [ 0, elements.size ]
results = []
elements.each do |element|
Functions.pair[0] += 1
Functions.node = element
res = eval( predicate )
case res
when true
results << element
when Fixnum
results << element if Functions.pair[0] == res
when String
results << element
end
end
return filter( results, rest )
end
def QuickPath::attribute( name )
return Functions.node.attributes[name] if Functions.node.kind_of? Element
end
def QuickPath::name()
return Functions.node.name if Functions.node.kind_of? Element
end
def QuickPath::method_missing( id, *args )
begin
Functions.send( id.id2name, *args )
rescue Exception
raise "METHOD: #{id.id2name}(#{args.join ', '})\n#{$!.message}"
end
end
def QuickPath::function( elements, fname, rest )
args = parse_args( elements, rest )
Functions.pair = [0, elements.size]
results = []
elements.each do |element|
Functions.pair[0] += 1
Functions.node = element
res = Functions.send( fname, *args )
case res
when true
results << element
when Fixnum
results << element if Functions.pair[0] == res
end
end
return results
end
def QuickPath::parse_args( element, string )
# /.*?(?:\)|,)/
arguments = []
buffer = ""
while string and string != ""
c = string[0]
string.sub!(/^./u, "")
case c
when ?,
# if depth = 1, then we start a new argument
arguments << evaluate( buffer )
#arguments << evaluate( string[0..count] )
when ?(
# start a new method call
function( element, buffer, string )
buffer = ""
when ?)
# close the method call and return arguments
return arguments
else
buffer << c
end
end
""
end
end
end

31
vendor/plugins/rexml/lib/rexml/rexml.rb vendored Normal file
View file

@ -0,0 +1,31 @@
# -*- encoding: utf-8 -*-
# REXML is an XML toolkit for Ruby[http://www.ruby-lang.org], in Ruby.
#
# REXML is a _pure_ Ruby, XML 1.0 conforming,
# non-validating[http://www.w3.org/TR/2004/REC-xml-20040204/#sec-conformance]
# toolkit with an intuitive API. REXML passes 100% of the non-validating Oasis
# tests[http://www.oasis-open.org/committees/xml-conformance/xml-test-suite.shtml],
# and provides tree, stream, SAX2, pull, and lightweight APIs. REXML also
# includes a full XPath[http://www.w3c.org/tr/xpath] 1.0 implementation. Since
# Ruby 1.8, REXML is included in the standard Ruby distribution.
#
# Main page:: http://www.germane-software.com/software/rexml
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
# Version:: 3.1.7.2
# Date:: 2007/275
#
# This API documentation can be downloaded from the REXML home page, or can
# be accessed online[http://www.germane-software.com/software/rexml_doc]
#
# A tutorial is available in the REXML distribution in docs/tutorial.html,
# or can be accessed
# online[http://www.germane-software.com/software/rexml/docs/tutorial.html]
module REXML
COPYRIGHT = "Copyright © 2001-2007 Sean Russell <ser@germane-software.com>"
DATE = "2007/275"
VERSION = "3.1.7.2"
REVISION = "$Revision: 1284 $".gsub(/\$Revision:|\$/,'').strip
Copyright = COPYRIGHT
Version = VERSION
end

View file

@ -0,0 +1,97 @@
module REXML
# A template for stream parser listeners.
# Note that the declarations (attlistdecl, elementdecl, etc) are trivially
# processed; REXML doesn't yet handle doctype entity declarations, so you
# have to parse them out yourself.
# === Missing methods from SAX2
# ignorable_whitespace
# === Methods extending SAX2
# +WARNING+
# These methods are certainly going to change, until DTDs are fully
# supported. Be aware of this.
# start_document
# end_document
# doctype
# elementdecl
# attlistdecl
# entitydecl
# notationdecl
# cdata
# xmldecl
# comment
module SAX2Listener
def start_document
end
def end_document
end
def start_prefix_mapping prefix, uri
end
def end_prefix_mapping prefix
end
def start_element uri, localname, qname, attributes
end
def end_element uri, localname, qname
end
def characters text
end
def processing_instruction target, data
end
# Handles a doctype declaration. Any attributes of the doctype which are
# not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
# @p name the name of the doctype; EG, "me"
# @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
# @p long_name the supplied long name, or nil. EG, "foo"
# @p uri the uri of the doctype, or nil. EG, "bar"
def doctype name, pub_sys, long_name, uri
end
# If a doctype includes an ATTLIST declaration, it will cause this
# method to be called. The content is the declaration itself, unparsed.
# EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
# attr CDATA #REQUIRED". This is the same for all of the .*decl
# methods.
def attlistdecl(element, pairs, contents)
end
# <!ELEMENT ...>
def elementdecl content
end
# <!ENTITY ...>
# The argument passed to this method is an array of the entity
# declaration. It can be in a number of formats, but in general it
# returns (example, result):
# <!ENTITY % YN '"Yes"'>
# ["%", "YN", "'\"Yes\"'", "\""]
# <!ENTITY % YN 'Yes'>
# ["%", "YN", "'Yes'", "s"]
# <!ENTITY WhatHeSaid "He said %YN;">
# ["WhatHeSaid", "\"He said %YN;\"", "YN"]
# <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
# ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
# <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
# ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
# <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
# ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
def entitydecl name, decl
end
# <!NOTATION ...>
def notationdecl content
end
# Called when <![CDATA[ ... ]]> is encountered in a document.
# @p content "..."
def cdata content
end
# Called when an XML PI is encountered in the document.
# EG: <?xml version="1.0" encoding="utf"?>
# @p version the version attribute value. EG, "1.0"
# @p encoding the encoding attribute value, or nil. EG, "utf"
# @p standalone the standalone attribute value, or nil. EG, nil
# @p spaced the declaration is followed by a line break
def xmldecl version, encoding, standalone
end
# Called when a comment is encountered.
# @p comment The content of the comment
def comment comment
end
def progress position
end
end
end

258
vendor/plugins/rexml/lib/rexml/source.rb vendored Normal file
View file

@ -0,0 +1,258 @@
require 'rexml/encoding'
module REXML
# Generates Source-s. USE THIS CLASS.
class SourceFactory
# Generates a Source object
# @param arg Either a String, or an IO
# @return a Source, or nil if a bad argument was given
def SourceFactory::create_from(arg)
if arg.respond_to? :read and
arg.respond_to? :readline and
arg.respond_to? :nil? and
arg.respond_to? :eof?
IOSource.new(arg)
elsif arg.respond_to? :to_str
require 'stringio'
IOSource.new(StringIO.new(arg))
elsif arg.kind_of? Source
arg
else
raise "#{arg.class} is not a valid input stream. It must walk \n"+
"like either a String, an IO, or a Source."
end
end
end
# A Source can be searched for patterns, and wraps buffers and other
# objects and provides consumption of text
class Source
include Encoding
# The current buffer (what we're going to read next)
attr_reader :buffer
# The line number of the last consumed text
attr_reader :line
attr_reader :encoding
# Constructor
# @param arg must be a String, and should be a valid XML document
# @param encoding if non-null, sets the encoding of the source to this
# value, overriding all encoding detection
def initialize(arg, encoding=nil)
@orig = @buffer = arg
if encoding
self.encoding = encoding
else
self.encoding = check_encoding( @buffer )
end
@line = 0
end
# Inherited from Encoding
# Overridden to support optimized en/decoding
def encoding=(enc)
return unless super
@line_break = encode( '>' )
if enc != UTF_8
@buffer = decode(@buffer)
@to_utf = true
else
@to_utf = false
if @buffer.respond_to? :force_encoding
@buffer.force_encoding Encoding::UTF_8
end
end
end
# Scans the source for a given pattern. Note, that this is not your
# usual scan() method. For one thing, the pattern argument has some
# requirements; for another, the source can be consumed. You can easily
# confuse this method. Originally, the patterns were easier
# to construct and this method more robust, because this method
# generated search regexes on the fly; however, this was
# computationally expensive and slowed down the entire REXML package
# considerably, since this is by far the most commonly called method.
# @param pattern must be a Regexp, and must be in the form of
# /^\s*(#{your pattern, with no groups})(.*)/. The first group
# will be returned; the second group is used if the consume flag is
# set.
# @param consume if true, the pattern returned will be consumed, leaving
# everything after it in the Source.
# @return the pattern, if found, or nil if the Source is empty or the
# pattern is not found.
def scan(pattern, cons=false)
return nil if @buffer.nil?
rv = @buffer.scan(pattern)
@buffer = $' if cons and rv.size>0
rv
end
def read
end
def consume( pattern )
@buffer = $' if pattern.match( @buffer )
end
def match_to( char, pattern )
return pattern.match(@buffer)
end
def match_to_consume( char, pattern )
md = pattern.match(@buffer)
@buffer = $'
return md
end
def match(pattern, cons=false)
md = pattern.match(@buffer)
@buffer = $' if cons and md
return md
end
# @return true if the Source is exhausted
def empty?
@buffer == ""
end
def position
@orig.index( @buffer )
end
# @return the current line in the source
def current_line
lines = @orig.split
res = lines.grep @buffer[0..30]
res = res[-1] if res.kind_of? Array
lines.index( res ) if res
end
end
# A Source that wraps an IO. See the Source class for method
# documentation
class IOSource < Source
#attr_reader :block_size
# block_size has been deprecated
def initialize(arg, block_size=500, encoding=nil)
@er_source = @source = arg
@to_utf = false
# Determining the encoding is a deceptively difficult issue to resolve.
# First, we check the first two bytes for UTF-16. Then we
# assume that the encoding is at least ASCII enough for the '>', and
# we read until we get one of those. This gives us the XML declaration,
# if there is one. If there isn't one, the file MUST be UTF-8, as per
# the XML spec. If there is one, we can determine the encoding from
# it.
@buffer = ""
str = @source.read( 2 )
if encoding
self.encoding = encoding
elsif str[0,2] == "\xfe\xff"
@line_break = "\000>"
elsif str[0,2] == "\xff\xfe"
@line_break = ">\000"
elsif str[0,2] == "\xef\xbb"
str += @source.read(1)
str = '' if (str[2,1] == "\xBF")
@line_break = ">"
else
@line_break = ">"
end
super str+@source.readline( @line_break )
end
def scan(pattern, cons=false)
rv = super
# You'll notice that this next section is very similar to the same
# section in match(), but just a liiittle different. This is
# because it is a touch faster to do it this way with scan()
# than the way match() does it; enough faster to warrent duplicating
# some code
if rv.size == 0
until @buffer =~ pattern or @source.nil?
begin
# READLINE OPT
#str = @source.read(@block_size)
str = @source.readline(@line_break)
str = decode(str) if @to_utf and str
@buffer << str
rescue Iconv::IllegalSequence
raise
rescue
@source = nil
end
end
rv = super
end
rv.taint
rv
end
def read
begin
str = @source.readline(@line_break)
str = decode(str) if @to_utf and str
@buffer << str
if not @to_utf and @buffer.respond_to? :force_encoding
@buffer.force_encoding Encoding::UTF_8
end
rescue Exception, NameError
@source = nil
end
end
def consume( pattern )
match( pattern, true )
end
def match( pattern, cons=false )
rv = pattern.match(@buffer)
@buffer = $' if cons and rv
while !rv and @source
begin
str = @source.readline(@line_break)
str = decode(str) if @to_utf and str
@buffer << str
rv = pattern.match(@buffer)
@buffer = $' if cons and rv
rescue
@source = nil
end
end
rv.taint
rv
end
def empty?
super and ( @source.nil? || @source.eof? )
end
def position
@er_source.pos rescue 0
end
# @return the current line in the source
def current_line
begin
pos = @er_source.pos # The byte position in the source
lineno = @er_source.lineno # The XML < position in the source
@er_source.rewind
line = 0 # The \r\n position in the source
begin
while @er_source.pos < pos
@er_source.readline
line += 1
end
rescue
end
rescue IOError
pos = -1
line = -1
end
[pos, lineno, line]
end
end
end

View file

@ -0,0 +1,92 @@
module REXML
# A template for stream parser listeners.
# Note that the declarations (attlistdecl, elementdecl, etc) are trivially
# processed; REXML doesn't yet handle doctype entity declarations, so you
# have to parse them out yourself.
module StreamListener
# Called when a tag is encountered.
# @p name the tag name
# @p attrs an array of arrays of attribute/value pairs, suitable for
# use with assoc or rassoc. IE, <tag attr1="value1" attr2="value2">
# will result in
# tag_start( "tag", # [["attr1","value1"],["attr2","value2"]])
def tag_start name, attrs
end
# Called when the end tag is reached. In the case of <tag/>, tag_end
# will be called immidiately after tag_start
# @p the name of the tag
def tag_end name
end
# Called when text is encountered in the document
# @p text the text content.
def text text
end
# Called when an instruction is encountered. EG: <?xsl sheet='foo'?>
# @p name the instruction name; in the example, "xsl"
# @p instruction the rest of the instruction. In the example,
# "sheet='foo'"
def instruction name, instruction
end
# Called when a comment is encountered.
# @p comment The content of the comment
def comment comment
end
# Handles a doctype declaration. Any attributes of the doctype which are
# not supplied will be nil. # EG, <!DOCTYPE me PUBLIC "foo" "bar">
# @p name the name of the doctype; EG, "me"
# @p pub_sys "PUBLIC", "SYSTEM", or nil. EG, "PUBLIC"
# @p long_name the supplied long name, or nil. EG, "foo"
# @p uri the uri of the doctype, or nil. EG, "bar"
def doctype name, pub_sys, long_name, uri
end
# Called when the doctype is done
def doctype_end
end
# If a doctype includes an ATTLIST declaration, it will cause this
# method to be called. The content is the declaration itself, unparsed.
# EG, <!ATTLIST el attr CDATA #REQUIRED> will come to this method as "el
# attr CDATA #REQUIRED". This is the same for all of the .*decl
# methods.
def attlistdecl element_name, attributes, raw_content
end
# <!ELEMENT ...>
def elementdecl content
end
# <!ENTITY ...>
# The argument passed to this method is an array of the entity
# declaration. It can be in a number of formats, but in general it
# returns (example, result):
# <!ENTITY % YN '"Yes"'>
# ["%", "YN", "'\"Yes\"'", "\""]
# <!ENTITY % YN 'Yes'>
# ["%", "YN", "'Yes'", "s"]
# <!ENTITY WhatHeSaid "He said %YN;">
# ["WhatHeSaid", "\"He said %YN;\"", "YN"]
# <!ENTITY open-hatch SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
# ["open-hatch", "SYSTEM", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
# <!ENTITY open-hatch PUBLIC "-//Textuality//TEXT Standard open-hatch boilerplate//EN" "http://www.textuality.com/boilerplate/OpenHatch.xml">
# ["open-hatch", "PUBLIC", "\"-//Textuality//TEXT Standard open-hatch boilerplate//EN\"", "\"http://www.textuality.com/boilerplate/OpenHatch.xml\""]
# <!ENTITY hatch-pic SYSTEM "../grafix/OpenHatch.gif" NDATA gif>
# ["hatch-pic", "SYSTEM", "\"../grafix/OpenHatch.gif\"", "\n\t\t\t\t\t\t\tNDATA gif", "gif"]
def entitydecl content
end
# <!NOTATION ...>
def notationdecl content
end
# Called when %foo; is encountered in a doctype declaration.
# @p content "foo"
def entity content
end
# Called when <![CDATA[ ... ]]> is encountered in a document.
# @p content "..."
def cdata content
end
# Called when an XML PI is encountered in the document.
# EG: <?xml version="1.0" encoding="utf"?>
# @p version the version attribute value. EG, "1.0"
# @p encoding the encoding attribute value, or nil. EG, "utf"
# @p standalone the standalone attribute value, or nil. EG, nil
def xmldecl version, encoding, standalone
end
end
end

View file

@ -0,0 +1,32 @@
module REXML
class SyncEnumerator
include Enumerable
# Creates a new SyncEnumerator which enumerates rows of given
# Enumerable objects.
def initialize(*enums)
@gens = enums
@length = @gens.collect {|x| x.size }.max
end
# Returns the number of enumerated Enumerable objects, i.e. the size
# of each row.
def size
@gens.size
end
# Returns the number of enumerated Enumerable objects, i.e. the size
# of each row.
def length
@gens.length
end
# Enumerates rows of the Enumerable objects.
def each
@length.times {|i|
yield @gens.collect {|x| x[i]}
}
self
end
end
end

403
vendor/plugins/rexml/lib/rexml/text.rb vendored Normal file
View file

@ -0,0 +1,403 @@
require 'rexml/entity'
require 'rexml/doctype'
require 'rexml/child'
require 'rexml/doctype'
require 'rexml/parseexception'
module REXML
# Represents text nodes in an XML document
class Text < Child
include Comparable
# The order in which the substitutions occur
SPECIALS = [ /&(?!#?[\w-]+;)/u, /</u, />/u, /"/u, /'/u, /\r/u ]
SUBSTITUTES = ['&amp;', '&lt;', '&gt;', '&quot;', '&apos;', '&#13;']
# Characters which are substituted in written strings
SLAICEPS = [ '<', '>', '"', "'", '&' ]
SETUTITSBUS = [ /&lt;/u, /&gt;/u, /&quot;/u, /&apos;/u, /&amp;/u ]
# If +raw+ is true, then REXML leaves the value alone
attr_accessor :raw
NEEDS_A_SECOND_CHECK = /(<|&((#{Entity::NAME});|(#0*((?:\d+)|(?:x[a-fA-F0-9]+)));)?)/um
NUMERICENTITY = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
VALID_CHAR = [
0x9, 0xA, 0xD,
(0x20..0xD7FF),
(0xE000..0xFFFD),
(0x10000..0x10FFFF)
]
if String.method_defined? :encode
VALID_XML_CHARS = Regexp.new('^['+
VALID_CHAR.map { |item|
case item
when Fixnum
[item].pack('U').force_encoding('utf-8')
when Range
[item.first, '-'.ord, item.last].pack('UUU').force_encoding('utf-8')
end
}.join +
']*$')
else
VALID_XML_CHARS = /^(
[\x09\x0A\x0D\x20-\x7E] # ASCII
| [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
| [\xE1-\xEC\xEE][\x80-\xBF]{2} # straight 3-byte
| \xEF[\x80-\xBE]{2} #
| \xEF\xBF[\x80-\xBD] # excluding U+fffe and U+ffff
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
)*$/x;
end
# Constructor
# +arg+ if a String, the content is set to the String. If a Text,
# the object is shallowly cloned.
#
# +respect_whitespace+ (boolean, false) if true, whitespace is
# respected
#
# +parent+ (nil) if this is a Parent object, the parent
# will be set to this.
#
# +raw+ (nil) This argument can be given three values.
# If true, then the value of used to construct this object is expected to
# contain no unescaped XML markup, and REXML will not change the text. If
# this value is false, the string may contain any characters, and REXML will
# escape any and all defined entities whose values are contained in the
# text. If this value is nil (the default), then the raw value of the
# parent will be used as the raw value for this node. If there is no raw
# value for the parent, and no value is supplied, the default is false.
# Use this field if you have entities defined for some text, and you don't
# want REXML to escape that text in output.
# Text.new( "<&", false, nil, false ) #-> "&lt;&amp;"
# Text.new( "&lt;&amp;", false, nil, false ) #-> "&amp;lt;&amp;amp;"
# Text.new( "<&", false, nil, true ) #-> Parse exception
# Text.new( "&lt;&amp;", false, nil, true ) #-> "&lt;&amp;"
# # Assume that the entity "s" is defined to be "sean"
# # and that the entity "r" is defined to be "russell"
# Text.new( "sean russell" ) #-> "&s; &r;"
# Text.new( "sean russell", false, nil, true ) #-> "sean russell"
#
# +entity_filter+ (nil) This can be an array of entities to match in the
# supplied text. This argument is only useful if +raw+ is set to false.
# Text.new( "sean russell", false, nil, false, ["s"] ) #-> "&s; russell"
# Text.new( "sean russell", false, nil, true, ["s"] ) #-> "sean russell"
# In the last example, the +entity_filter+ argument is ignored.
#
# +pattern+ INTERNAL USE ONLY
def initialize(arg, respect_whitespace=false, parent=nil, raw=nil,
entity_filter=nil, illegal=NEEDS_A_SECOND_CHECK )
@raw = false
if parent
super( parent )
@raw = parent.raw
else
@parent = nil
end
@raw = raw unless raw.nil?
@entity_filter = entity_filter
@normalized = @unnormalized = nil
if arg.kind_of? String
@string = arg.clone
@string.squeeze!(" \n\t") unless respect_whitespace
elsif arg.kind_of? Text
@string = arg.to_s
@raw = arg.raw
elsif
raise "Illegal argument of type #{arg.type} for Text constructor (#{arg})"
end
@string.gsub!( /\r\n?/, "\n" )
Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent
end
def parent= parent
super(parent)
Text.check(@string, NEEDS_A_SECOND_CHECK, doctype) if @raw and @parent
end
# check for illegal characters
def Text.check string, pattern, doctype
# illegal anywhere
if string !~ VALID_XML_CHARS
if String.method_defined? :encode
string.chars.each do |c|
case c.ord
when *VALID_CHAR
else
raise "Illegal character #{c.inspect} in raw string \"#{string}\""
end
end
else
string.scan(/[\x00-\x7F]|[\x80-\xBF][\xC0-\xF0]*|[\xC0-\xF0]/) do |c|
case c.unpack('U')
when *VALID_CHAR
else
raise "Illegal character #{c.inspect} in raw string \"#{string}\""
end
end
end
end
# context sensitive
string.scan(pattern).each do
if $1[-1] != ?;
raise "Illegal character '#{$1}' in raw string \"#{string}\""
elsif $1[0] == ?&
if $5 and $5[0] == ?#
case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
when *VALID_CHAR
else
raise "Illegal character '#{$1}' in raw string \"#{string}\""
end
elsif $3 and !SUBSTITUTES.include?($1)
if !doctype or !doctype.entities.has_key?($3)
raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
end
end
end
end
end
def node_type
:text
end
def empty?
@string.size==0
end
def clone
return Text.new(self)
end
# Appends text to this text node. The text is appended in the +raw+ mode
# of this text node.
def <<( to_append )
@string << to_append.gsub( /\r\n?/, "\n" )
end
# +other+ a String or a Text
# +returns+ the result of (to_s <=> arg.to_s)
def <=>( other )
to_s() <=> other.to_s
end
def doctype
if @parent
doc = @parent.document
doc.doctype if doc
end
end
REFERENCE = /#{Entity::REFERENCE}/
# Returns the string value of this text node. This string is always
# escaped, meaning that it is a valid XML text node string, and all
# entities that can be escaped, have been inserted. This method respects
# the entity filter set in the constructor.
#
# # Assume that the entity "s" is defined to be "sean", and that the
# # entity "r" is defined to be "russell"
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
# t.to_s #-> "&lt; &amp; &s; russell"
# t = Text.new( "< & &s; russell", false, nil, false )
# t.to_s #-> "&lt; &amp; &s; russell"
# u = Text.new( "sean russell", false, nil, true )
# u.to_s #-> "sean russell"
def to_s
return @string if @raw
return @normalized if @normalized
@normalized = Text::normalize( @string, doctype, @entity_filter )
end
def inspect
@string.inspect
end
# Returns the string value of this text. This is the text without
# entities, as it might be used programmatically, or printed to the
# console. This ignores the 'raw' attribute setting, and any
# entity_filter.
#
# # Assume that the entity "s" is defined to be "sean", and that the
# # entity "r" is defined to be "russell"
# t = Text.new( "< & sean russell", false, nil, false, ['s'] )
# t.value #-> "< & sean russell"
# t = Text.new( "< & &s; russell", false, nil, false )
# t.value #-> "< & sean russell"
# u = Text.new( "sean russell", false, nil, true )
# u.value #-> "sean russell"
def value
return @unnormalized if @unnormalized
@unnormalized = Text::unnormalize( @string, doctype )
end
# Sets the contents of this text node. This expects the text to be
# unnormalized. It returns self.
#
# e = Element.new( "a" )
# e.add_text( "foo" ) # <a>foo</a>
# e[0].value = "bar" # <a>bar</a>
# e[0].value = "<a>" # <a>&lt;a&gt;</a>
def value=( val )
@string = val.gsub( /\r\n?/, "\n" )
@unnormalized = nil
@normalized = nil
@raw = false
end
def wrap(string, width, addnewline=false)
# Recursivly wrap string at width.
return string if string.length <= width
place = string.rindex(' ', width) # Position in string with last ' ' before cutoff
if addnewline then
return "\n" + string[0,place] + "\n" + wrap(string[place+1..-1], width)
else
return string[0,place] + "\n" + wrap(string[place+1..-1], width)
end
end
def indent_text(string, level=1, style="\t", indentfirstline=true)
return string if level < 0
new_string = ''
string.each { |line|
indent_string = style * level
new_line = (indent_string + line).sub(/[\s]+$/,'')
new_string << new_line
}
new_string.strip! unless indentfirstline
return new_string
end
# == DEPRECATED
# See REXML::Formatters
#
def write( writer, indent=-1, transitive=false, ie_hack=false )
Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters")
formatter = if indent > -1
REXML::Formatters::Pretty.new( indent )
else
REXML::Formatters::Default.new
end
formatter.write( self, writer )
end
# FIXME
# This probably won't work properly
def xpath
path = @parent.xpath
path += "/text()"
return path
end
# Writes out text, substituting special characters beforehand.
# +out+ A String, IO, or any other object supporting <<( String )
# +input+ the text to substitute and the write out
#
# z=utf8.unpack("U*")
# ascOut=""
# z.each{|r|
# if r < 0x100
# ascOut.concat(r.chr)
# else
# ascOut.concat(sprintf("&#x%x;", r))
# end
# }
# puts ascOut
def write_with_substitution out, input
copy = input.clone
# Doing it like this rather than in a loop improves the speed
copy.gsub!( SPECIALS[0], SUBSTITUTES[0] )
copy.gsub!( SPECIALS[1], SUBSTITUTES[1] )
copy.gsub!( SPECIALS[2], SUBSTITUTES[2] )
copy.gsub!( SPECIALS[3], SUBSTITUTES[3] )
copy.gsub!( SPECIALS[4], SUBSTITUTES[4] )
copy.gsub!( SPECIALS[5], SUBSTITUTES[5] )
out << copy
end
# Reads text, substituting entities
def Text::read_with_substitution( input, illegal=nil )
copy = input.clone
if copy =~ illegal
raise ParseException.new( "malformed text: Illegal character #$& in \"#{copy}\"" )
end if illegal
copy.gsub!( /\r\n?/, "\n" )
if copy.include? ?&
copy.gsub!( SETUTITSBUS[0], SLAICEPS[0] )
copy.gsub!( SETUTITSBUS[1], SLAICEPS[1] )
copy.gsub!( SETUTITSBUS[2], SLAICEPS[2] )
copy.gsub!( SETUTITSBUS[3], SLAICEPS[3] )
copy.gsub!( SETUTITSBUS[4], SLAICEPS[4] )
copy.gsub!( /&#0*((?:\d+)|(?:x[a-f0-9]+));/ ) {|m|
m=$1
#m='0' if m==''
m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*')
}
end
copy
end
EREFERENCE = /&(?!#{Entity::NAME};)/
# Escapes all possible entities
def Text::normalize( input, doctype=nil, entity_filter=nil )
copy = input.to_s
# Doing it like this rather than in a loop improves the speed
#copy = copy.gsub( EREFERENCE, '&amp;' )
copy = copy.gsub( "&", "&amp;" )
if doctype
# Replace all ampersands that aren't part of an entity
doctype.entities.each_value do |entity|
copy = copy.gsub( entity.value,
"&#{entity.name};" ) if entity.value and
not( entity_filter and entity_filter.include?(entity) )
end
else
# Replace all ampersands that aren't part of an entity
DocType::DEFAULT_ENTITIES.each_value do |entity|
copy = copy.gsub(entity.value, "&#{entity.name};" )
end
end
copy
end
# Unescapes all possible entities
def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil )
string.gsub( /\r\n?/, "\n" ).gsub( REFERENCE ) { |ref|
if ref[1] == ?#
if ref[2] == ?x
[ref[3...-1].to_i(16)].pack('U*')
else
[ref[2...-1].to_i].pack('U*')
end
elsif ref == '&amp;'
'&'
elsif filter and filter.include?( ref[1...-1] )
ref
elsif doctype
doctype.entity( ref[1...-1] ) or ref
else
entity_value = DocType::DEFAULT_ENTITIES[ ref[1...-1] ]
entity_value ? entity_value.value : ref
end
}
end
end
end

View file

@ -0,0 +1,8 @@
require 'rexml/parseexception'
module REXML
class UndefinedNamespaceException < ParseException
def initialize( prefix, source, parser )
super( "Undefined prefix #{prefix} found" )
end
end
end

View file

@ -0,0 +1,559 @@
require "rexml/validation/validation"
require "rexml/parsers/baseparser"
module REXML
module Validation
# Implemented:
# * empty
# * element
# * attribute
# * text
# * optional
# * choice
# * oneOrMore
# * zeroOrMore
# * group
# * value
# * interleave
# * mixed
# * ref
# * grammar
# * start
# * define
#
# Not implemented:
# * data
# * param
# * include
# * externalRef
# * notAllowed
# * anyName
# * nsName
# * except
# * name
class RelaxNG
include Validator
INFINITY = 1.0 / 0.0
EMPTY = Event.new( nil )
TEXT = [:start_element, "text"]
attr_accessor :current
attr_accessor :count
attr_reader :references
# FIXME: Namespaces
def initialize source
parser = REXML::Parsers::BaseParser.new( source )
@count = 0
@references = {}
@root = @current = Sequence.new(self)
@root.previous = true
states = [ @current ]
begin
event = parser.pull
case event[0]
when :start_element
case event[1]
when "empty"
when "element", "attribute", "text", "value"
states[-1] << event
when "optional"
states << Optional.new( self )
states[-2] << states[-1]
when "choice"
states << Choice.new( self )
states[-2] << states[-1]
when "oneOrMore"
states << OneOrMore.new( self )
states[-2] << states[-1]
when "zeroOrMore"
states << ZeroOrMore.new( self )
states[-2] << states[-1]
when "group"
states << Sequence.new( self )
states[-2] << states[-1]
when "interleave"
states << Interleave.new( self )
states[-2] << states[-1]
when "mixed"
states << Interleave.new( self )
states[-2] << states[-1]
states[-1] << TEXT
when "define"
states << [ event[2]["name"] ]
when "ref"
states[-1] << Ref.new( event[2]["name"] )
when "anyName"
states << AnyName.new( self )
states[-2] << states[-1]
when "nsName"
when "except"
when "name"
when "data"
when "param"
when "include"
when "grammar"
when "start"
when "externalRef"
when "notAllowed"
end
when :end_element
case event[1]
when "element", "attribute"
states[-1] << event
when "zeroOrMore", "oneOrMore", "choice", "optional",
"interleave", "group", "mixed"
states.pop
when "define"
ref = states.pop
@references[ ref.shift ] = ref
#when "empty"
end
when :end_document
states[-1] << event
when :text
states[-1] << event
end
end while event[0] != :end_document
end
def receive event
validate( event )
end
end
class State
def initialize( context )
@previous = []
@events = []
@current = 0
@count = context.count += 1
@references = context.references
@value = false
end
def reset
return if @current == 0
@current = 0
@events.each {|s| s.reset if s.kind_of? State }
end
def previous=( previous )
@previous << previous
end
def next( event )
#print "In next with #{event.inspect}. "
#puts "Next (#@current) is #{@events[@current]}"
#p @previous
return @previous.pop.next( event ) if @events[@current].nil?
expand_ref_in( @events, @current ) if @events[@current].class == Ref
if ( @events[@current].kind_of? State )
@current += 1
@events[@current-1].previous = self
return @events[@current-1].next( event )
end
#puts "Current isn't a state"
if ( @events[@current].matches?(event) )
@current += 1
if @events[@current].nil?
#puts "#{inspect[0,5]} 1RETURNING #{@previous.inspect[0,5]}"
return @previous.pop
elsif @events[@current].kind_of? State
@current += 1
#puts "#{inspect[0,5]} 2RETURNING (#{@current-1}) #{@events[@current-1].inspect[0,5]}; on return, next is #{@events[@current]}"
@events[@current-1].previous = self
return @events[@current-1]
else
#puts "#{inspect[0,5]} RETURNING self w/ next(#@current) = #{@events[@current]}"
return self
end
else
return nil
end
end
def to_s
# Abbreviated:
self.class.name =~ /(?:::)(\w)\w+$/
# Full:
#self.class.name =~ /(?:::)(\w+)$/
"#$1.#@count"
end
def inspect
"< #{to_s} #{@events.collect{|e|
pre = e == @events[@current] ? '#' : ''
pre + e.inspect unless self == e
}.join(', ')} >"
end
def expected
return [@events[@current]]
end
def <<( event )
add_event_to_arry( @events, event )
end
protected
def expand_ref_in( arry, ind )
new_events = []
@references[ arry[ind].to_s ].each{ |evt|
add_event_to_arry(new_events,evt)
}
arry[ind,1] = new_events
end
def add_event_to_arry( arry, evt )
evt = generate_event( evt )
if evt.kind_of? String
arry[-1].event_arg = evt if arry[-1].kind_of? Event and @value
@value = false
else
arry << evt
end
end
def generate_event( event )
return event if event.kind_of? State or event.class == Ref
evt = nil
arg = nil
case event[0]
when :start_element
case event[1]
when "element"
evt = :start_element
arg = event[2]["name"]
when "attribute"
evt = :start_attribute
arg = event[2]["name"]
when "text"
evt = :text
when "value"
evt = :text
@value = true
end
when :text
return event[1]
when :end_document
return Event.new( event[0] )
else # then :end_element
case event[1]
when "element"
evt = :end_element
when "attribute"
evt = :end_attribute
end
end
return Event.new( evt, arg )
end
end
class Sequence < State
def matches?(event)
@events[@current].matches?( event )
end
end
class Optional < State
def next( event )
if @current == 0
rv = super
return rv if rv
@prior = @previous.pop
return @prior.next( event )
end
super
end
def matches?(event)
@events[@current].matches?(event) ||
(@current == 0 and @previous[-1].matches?(event))
end
def expected
return [ @prior.expected, @events[0] ].flatten if @current == 0
return [@events[@current]]
end
end
class ZeroOrMore < Optional
def next( event )
expand_ref_in( @events, @current ) if @events[@current].class == Ref
if ( @events[@current].matches?(event) )
@current += 1
if @events[@current].nil?
@current = 0
return self
elsif @events[@current].kind_of? State
@current += 1
@events[@current-1].previous = self
return @events[@current-1]
else
return self
end
else
@prior = @previous.pop
return @prior.next( event ) if @current == 0
return nil
end
end
def expected
return [ @prior.expected, @events[0] ].flatten if @current == 0
return [@events[@current]]
end
end
class OneOrMore < State
def initialize context
super
@ord = 0
end
def reset
super
@ord = 0
end
def next( event )
expand_ref_in( @events, @current ) if @events[@current].class == Ref
if ( @events[@current].matches?(event) )
@current += 1
@ord += 1
if @events[@current].nil?
@current = 0
return self
elsif @events[@current].kind_of? State
@current += 1
@events[@current-1].previous = self
return @events[@current-1]
else
return self
end
else
return @previous.pop.next( event ) if @current == 0 and @ord > 0
return nil
end
end
def matches?( event )
@events[@current].matches?(event) ||
(@current == 0 and @ord > 0 and @previous[-1].matches?(event))
end
def expected
if @current == 0 and @ord > 0
return [@previous[-1].expected, @events[0]].flatten
else
return [@events[@current]]
end
end
end
class Choice < State
def initialize context
super
@choices = []
end
def reset
super
@events = []
@choices.each { |c| c.each { |s| s.reset if s.kind_of? State } }
end
def <<( event )
add_event_to_arry( @choices, event )
end
def next( event )
# Make the choice if we haven't
if @events.size == 0
c = 0 ; max = @choices.size
while c < max
if @choices[c][0].class == Ref
expand_ref_in( @choices[c], 0 )
@choices += @choices[c]
@choices.delete( @choices[c] )
max -= 1
else
c += 1
end
end
@events = @choices.find { |evt| evt[0].matches? event }
# Remove the references
# Find the events
end
#puts "In next with #{event.inspect}."
#puts "events is #{@events.inspect}"
unless @events
@events = []
return nil
end
#puts "current = #@current"
super
end
def matches?( event )
return @events[@current].matches?( event ) if @events.size > 0
!@choices.find{|evt| evt[0].matches?(event)}.nil?
end
def expected
#puts "IN CHOICE EXPECTED"
#puts "EVENTS = #{@events.inspect}"
return [@events[@current]] if @events.size > 0
return @choices.collect do |x|
if x[0].kind_of? State
x[0].expected
else
x[0]
end
end.flatten
end
def inspect
"< #{to_s} #{@choices.collect{|e| e.collect{|f|f.to_s}.join(', ')}.join(' or ')} >"
end
protected
def add_event_to_arry( arry, evt )
if evt.kind_of? State or evt.class == Ref
arry << [evt]
elsif evt[0] == :text
if arry[-1] and
arry[-1][-1].kind_of?( Event ) and
arry[-1][-1].event_type == :text and @value
arry[-1][-1].event_arg = evt[1]
@value = false
end
else
arry << [] if evt[0] == :start_element
arry[-1] << generate_event( evt )
end
end
end
class Interleave < Choice
def initialize context
super
@choice = 0
end
def reset
@choice = 0
end
def next_current( event )
# Expand references
c = 0 ; max = @choices.size
while c < max
if @choices[c][0].class == Ref
expand_ref_in( @choices[c], 0 )
@choices += @choices[c]
@choices.delete( @choices[c] )
max -= 1
else
c += 1
end
end
@events = @choices[@choice..-1].find { |evt| evt[0].matches? event }
@current = 0
if @events
# reorder the choices
old = @choices[@choice]
idx = @choices.index( @events )
@choices[@choice] = @events
@choices[idx] = old
@choice += 1
end
#puts "In next with #{event.inspect}."
#puts "events is #{@events.inspect}"
@events = [] unless @events
end
def next( event )
# Find the next series
next_current(event) unless @events[@current]
return nil unless @events[@current]
expand_ref_in( @events, @current ) if @events[@current].class == Ref
#puts "In next with #{event.inspect}."
#puts "Next (#@current) is #{@events[@current]}"
if ( @events[@current].kind_of? State )
@current += 1
@events[@current-1].previous = self
return @events[@current-1].next( event )
end
#puts "Current isn't a state"
return @previous.pop.next( event ) if @events[@current].nil?
if ( @events[@current].matches?(event) )
@current += 1
if @events[@current].nil?
#puts "#{inspect[0,5]} 1RETURNING self" unless @choices[@choice].nil?
return self unless @choices[@choice].nil?
#puts "#{inspect[0,5]} 1RETURNING #{@previous[-1].inspect[0,5]}"
return @previous.pop
elsif @events[@current].kind_of? State
@current += 1
#puts "#{inspect[0,5]} 2RETURNING (#{@current-1}) #{@events[@current-1].inspect[0,5]}; on return, next is #{@events[@current]}"
@events[@current-1].previous = self
return @events[@current-1]
else
#puts "#{inspect[0,5]} RETURNING self w/ next(#@current) = #{@events[@current]}"
return self
end
else
return nil
end
end
def matches?( event )
return @events[@current].matches?( event ) if @events[@current]
!@choices[@choice..-1].find{|evt| evt[0].matches?(event)}.nil?
end
def expected
#puts "IN CHOICE EXPECTED"
#puts "EVENTS = #{@events.inspect}"
return [@events[@current]] if @events[@current]
return @choices[@choice..-1].collect do |x|
if x[0].kind_of? State
x[0].expected
else
x[0]
end
end.flatten
end
def inspect
"< #{to_s} #{@choices.collect{|e| e.collect{|f|f.to_s}.join(', ')}.join(' and ')} >"
end
end
class Ref
def initialize value
@value = value
end
def to_s
@value
end
def inspect
"{#{to_s}}"
end
end
end
end

View file

@ -0,0 +1,155 @@
require 'rexml/validation/validationexception'
module REXML
module Validation
module Validator
NILEVENT = [ nil ]
def reset
@current = @root
@root.reset
@root.previous = true
@attr_stack = []
self
end
def dump
puts @root.inspect
end
def validate( event )
#puts "Current: #@current"
#puts "Event: #{event.inspect}"
@attr_stack = [] unless defined? @attr_stack
match = @current.next(event)
raise ValidationException.new( "Validation error. Expected: "+
@current.expected.join( " or " )+" from #{@current.inspect} "+
" but got #{Event.new( event[0], event[1] ).inspect}" ) unless match
@current = match
# Check for attributes
case event[0]
when :start_element
#puts "Checking attributes"
@attr_stack << event[2]
begin
sattr = [:start_attribute, nil]
eattr = [:end_attribute]
text = [:text, nil]
k,v = event[2].find { |key,value|
sattr[1] = key
#puts "Looking for #{sattr.inspect}"
m = @current.next( sattr )
#puts "Got #{m.inspect}"
if m
# If the state has text children...
#puts "Looking for #{eattr.inspect}"
#puts "Expect #{m.expected}"
if m.matches?( eattr )
#puts "Got end"
@current = m
else
#puts "Didn't get end"
text[1] = value
#puts "Looking for #{text.inspect}"
m = m.next( text )
#puts "Got #{m.inspect}"
text[1] = nil
return false unless m
@current = m if m
end
m = @current.next( eattr )
if m
@current = m
true
else
false
end
else
false
end
}
event[2].delete(k) if k
end while k
when :end_element
attrs = @attr_stack.pop
raise ValidationException.new( "Validation error. Illegal "+
" attributes: #{attrs.inspect}") if attrs.length > 0
end
end
end
class Event
def initialize(event_type, event_arg=nil )
@event_type = event_type
@event_arg = event_arg
end
attr_reader :event_type
attr_accessor :event_arg
def done?
@done
end
def single?
return (@event_type != :start_element and @event_type != :start_attribute)
end
def matches?( event )
#puts "#@event_type =? #{event[0]} && #@event_arg =? #{event[1]} "
return false unless event[0] == @event_type
case event[0]
when nil
return true
when :start_element
return true if event[1] == @event_arg
when :end_element
return true
when :start_attribute
return true if event[1] == @event_arg
when :end_attribute
return true
when :end_document
return true
when :text
return (@event_arg.nil? or @event_arg == event[1])
=begin
when :processing_instruction
false
when :xmldecl
false
when :start_doctype
false
when :end_doctype
false
when :externalentity
false
when :elementdecl
false
when :entity
false
when :attlistdecl
false
when :notationdecl
false
when :end_doctype
false
=end
else
false
end
end
def ==( other )
return false unless other.kind_of? Event
@event_type == other.event_type and @event_arg == other.event_arg
end
def to_s
inspect
end
def inspect
"#{@event_type.inspect}( #@event_arg )"
end
end
end
end

View file

@ -0,0 +1,9 @@
module REXML
module Validation
class ValidationException < RuntimeError
def initialize msg
super
end
end
end
end

View file

@ -0,0 +1,119 @@
require 'rexml/encoding'
require 'rexml/source'
module REXML
# NEEDS DOCUMENTATION
class XMLDecl < Child
include Encoding
DEFAULT_VERSION = "1.0";
DEFAULT_ENCODING = "UTF-8";
DEFAULT_STANDALONE = "no";
START = '<\?xml';
STOP = '\?>';
attr_accessor :version, :standalone
attr_reader :writeencoding, :writethis
def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
@writethis = true
@writeencoding = !encoding.nil?
if version.kind_of? XMLDecl
super()
@version = version.version
self.encoding = version.encoding
@writeencoding = version.writeencoding
@standalone = version.standalone
else
super()
@version = version
self.encoding = encoding
@standalone = standalone
end
@version = DEFAULT_VERSION if @version.nil?
end
def clone
XMLDecl.new(self)
end
# indent::
# Ignored. There must be no whitespace before an XML declaration
# transitive::
# Ignored
# ie_hack::
# Ignored
def write(writer, indent=-1, transitive=false, ie_hack=false)
return nil unless @writethis or writer.kind_of? Output
writer << START.sub(/\\/u, '')
if writer.kind_of? Output
writer << " #{content writer.encoding}"
else
writer << " #{content encoding}"
end
writer << STOP.sub(/\\/u, '')
end
def ==( other )
other.kind_of?(XMLDecl) and
other.version == @version and
other.encoding == self.encoding and
other.standalone == @standalone
end
def xmldecl version, encoding, standalone
@version = version
self.encoding = encoding
@standalone = standalone
end
def node_type
:xmldecl
end
alias :stand_alone? :standalone
alias :old_enc= :encoding=
def encoding=( enc )
if enc.nil?
self.old_enc = "UTF-8"
@writeencoding = false
else
self.old_enc = enc
@writeencoding = true
end
self.dowrite
end
# Only use this if you do not want the XML declaration to be written;
# this object is ignored by the XML writer. Otherwise, instantiate your
# own XMLDecl and add it to the document.
#
# Note that XML 1.1 documents *must* include an XML declaration
def XMLDecl.default
rv = XMLDecl.new( "1.0" )
rv.nowrite
rv
end
def nowrite
@writethis = false
end
def dowrite
@writethis = true
end
def inspect
START.sub(/\\/u, '') + " ... " + STOP.sub(/\\/u, '')
end
private
def content(enc)
rv = "version='#@version'"
rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i
rv << " standalone='#@standalone'" if @standalone
rv
end
end
end

View file

@ -0,0 +1,18 @@
module REXML
# Defines a number of tokens used for parsing XML. Not for general
# consumption.
module XMLTokens
NCNAME_STR= '[\w:][\-\w\d.]*'
NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
NAMECHAR = '[\-\w\d\.:]'
NAME = "([\\w:]#{NAMECHAR}*)"
NMTOKEN = "(?:#{NAMECHAR})+"
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
#REFERENCE = "(?:#{ENTITYREF}|#{CHARREF})"
#ENTITYREF = "&#{NAME};"
#CHARREF = "&#\\d+;|&#x[0-9a-fA-F]+;"
end
end

66
vendor/plugins/rexml/lib/rexml/xpath.rb vendored Normal file
View file

@ -0,0 +1,66 @@
require 'rexml/functions'
require 'rexml/xpath_parser'
module REXML
# Wrapper class. Use this class to access the XPath functions.
class XPath
include Functions
EMPTY_HASH = {}
# Finds and returns the first node that matches the supplied xpath.
# element::
# The context element
# path::
# The xpath to search for. If not supplied or nil, returns the first
# node matching '*'.
# namespaces::
# If supplied, a Hash which defines a namespace mapping.
#
# XPath.first( node )
# XPath.first( doc, "//b"} )
# XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
def XPath::first element, path=nil, namespaces=nil, variables={}
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
parser = XPathParser.new
parser.namespaces = namespaces
parser.variables = variables
path = "*" unless path
element = [element] unless element.kind_of? Array
parser.parse(path, element).flatten[0]
end
# Itterates over nodes that match the given path, calling the supplied
# block with the match.
# element::
# The context element
# path::
# The xpath to search for. If not supplied or nil, defaults to '*'
# namespaces::
# If supplied, a Hash which defines a namespace mapping
#
# XPath.each( node ) { |el| ... }
# XPath.each( node, '/*[@attr='v']' ) { |el| ... }
# XPath.each( node, 'ancestor::x' ) { |el| ... }
def XPath::each element, path=nil, namespaces=nil, variables={}, &block
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
parser = XPathParser.new
parser.namespaces = namespaces
parser.variables = variables
path = "*" unless path
element = [element] unless element.kind_of? Array
parser.parse(path, element).each( &block )
end
# Returns an array of nodes matching a given XPath.
def XPath::match element, path=nil, namespaces=nil, variables={}
parser = XPathParser.new
parser.namespaces = namespaces
parser.variables = variables
path = "*" unless path
element = [element] unless element.kind_of? Array
parser.parse(path,element)
end
end
end

View file

@ -0,0 +1,792 @@
require 'rexml/namespace'
require 'rexml/xmltokens'
require 'rexml/attribute'
require 'rexml/syncenumerator'
require 'rexml/parsers/xpathparser'
class Object
def dclone
clone
end
end
class Symbol
def dclone ; self ; end
end
class Fixnum
def dclone ; self ; end
end
class Float
def dclone ; self ; end
end
class Array
def dclone
klone = self.clone
klone.clear
self.each{|v| klone << v.dclone}
klone
end
end
module REXML
# You don't want to use this class. Really. Use XPath, which is a wrapper
# for this class. Believe me. You don't want to poke around in here.
# There is strange, dark magic at work in this code. Beware. Go back! Go
# back while you still can!
class XPathParser
include XMLTokens
LITERAL = /^'([^']*)'|^"([^"]*)"/u
def initialize( )
@parser = REXML::Parsers::XPathParser.new
@namespaces = nil
@variables = {}
end
def namespaces=( namespaces={} )
Functions::namespace_context = namespaces
@namespaces = namespaces
end
def variables=( vars={} )
Functions::variables = vars
@variables = vars
end
def parse path, nodeset
#puts "#"*40
path_stack = @parser.parse( path )
#puts "PARSE: #{path} => #{path_stack.inspect}"
#puts "PARSE: nodeset = #{nodeset.inspect}"
match( path_stack, nodeset )
end
def get_first path, nodeset
#puts "#"*40
path_stack = @parser.parse( path )
#puts "PARSE: #{path} => #{path_stack.inspect}"
#puts "PARSE: nodeset = #{nodeset.inspect}"
first( path_stack, nodeset )
end
def predicate path, nodeset
path_stack = @parser.parse( path )
expr( path_stack, nodeset )
end
def []=( variable_name, value )
@variables[ variable_name ] = value
end
# Performs a depth-first (document order) XPath search, and returns the
# first match. This is the fastest, lightest way to return a single result.
#
# FIXME: This method is incomplete!
def first( path_stack, node )
#puts "#{depth}) Entering match( #{path.inspect}, #{tree.inspect} )"
return nil if path.size == 0
case path[0]
when :document
# do nothing
return first( path[1..-1], node )
when :child
for c in node.children
#puts "#{depth}) CHILD checking #{name(c)}"
r = first( path[1..-1], c )
#puts "#{depth}) RETURNING #{r.inspect}" if r
return r if r
end
when :qname
name = path[2]
#puts "#{depth}) QNAME #{name(tree)} == #{name} (path => #{path.size})"
if node.name == name
#puts "#{depth}) RETURNING #{tree.inspect}" if path.size == 3
return node if path.size == 3
return first( path[3..-1], node )
else
return nil
end
when :descendant_or_self
r = first( path[1..-1], node )
return r if r
for c in node.children
r = first( path, c )
return r if r
end
when :node
return first( path[1..-1], node )
when :any
return first( path[1..-1], node )
end
return nil
end
def match( path_stack, nodeset )
#puts "MATCH: path_stack = #{path_stack.inspect}"
#puts "MATCH: nodeset = #{nodeset.inspect}"
r = expr( path_stack, nodeset )
#puts "MAIN EXPR => #{r.inspect}"
r
end
private
# Returns a String namespace for a node, given a prefix
# The rules are:
#
# 1. Use the supplied namespace mapping first.
# 2. If no mapping was supplied, use the context node to look up the namespace
def get_namespace( node, prefix )
if @namespaces
return @namespaces[prefix] || ''
else
return node.namespace( prefix ) if node.node_type == :element
return ''
end
end
# Expr takes a stack of path elements and a set of nodes (either a Parent
# or an Array and returns an Array of matching nodes
ALL = [ :attribute, :element, :text, :processing_instruction, :comment ]
ELEMENTS = [ :element ]
def expr( path_stack, nodeset, context=nil )
#puts "#"*15
#puts "In expr with #{path_stack.inspect}"
#puts "Returning" if path_stack.length == 0 || nodeset.length == 0
node_types = ELEMENTS
return nodeset if path_stack.length == 0 || nodeset.length == 0
while path_stack.length > 0
#puts "#"*5
#puts "Path stack = #{path_stack.inspect}"
#puts "Nodeset is #{nodeset.inspect}"
if nodeset.length == 0
path_stack.clear
return []
end
case (op = path_stack.shift)
when :document
nodeset = [ nodeset[0].root_node ]
#puts ":document, nodeset = #{nodeset.inspect}"
when :qname
#puts "IN QNAME"
prefix = path_stack.shift
name = path_stack.shift
nodeset.delete_if do |node|
# FIXME: This DOUBLES the time XPath searches take
ns = get_namespace( node, prefix )
#puts "NS = #{ns.inspect}"
#puts "node.node_type == :element => #{node.node_type == :element}"
if node.node_type == :element
#puts "node.name == #{name} => #{node.name == name}"
if node.name == name
#puts "node.namespace == #{ns.inspect} => #{node.namespace == ns}"
end
end
!(node.node_type == :element and
node.name == name and
node.namespace == ns )
end
node_types = ELEMENTS
when :any
#puts "ANY 1: nodeset = #{nodeset.inspect}"
#puts "ANY 1: node_types = #{node_types.inspect}"
nodeset.delete_if { |node| !node_types.include?(node.node_type) }
#puts "ANY 2: nodeset = #{nodeset.inspect}"
when :self
# This space left intentionally blank
when :processing_instruction
target = path_stack.shift
nodeset.delete_if do |node|
(node.node_type != :processing_instruction) or
( target!='' and ( node.target != target ) )
end
when :text
nodeset.delete_if { |node| node.node_type != :text }
when :comment
nodeset.delete_if { |node| node.node_type != :comment }
when :node
# This space left intentionally blank
node_types = ALL
when :child
new_nodeset = []
nt = nil
nodeset.each do |node|
nt = node.node_type
new_nodeset += node.children if nt == :element or nt == :document
end
nodeset = new_nodeset
node_types = ELEMENTS
when :literal
return path_stack.shift
when :attribute
new_nodeset = []
case path_stack.shift
when :qname
prefix = path_stack.shift
name = path_stack.shift
for element in nodeset
if element.node_type == :element
#puts "Element name = #{element.name}"
#puts "get_namespace( #{element.inspect}, #{prefix} ) = #{get_namespace(element, prefix)}"
attrib = element.attribute( name, get_namespace(element, prefix) )
#puts "attrib = #{attrib.inspect}"
new_nodeset << attrib if attrib
end
end
when :any
#puts "ANY"
for element in nodeset
if element.node_type == :element
new_nodeset += element.attributes.to_a
end
end
end
nodeset = new_nodeset
when :parent
#puts "PARENT 1: nodeset = #{nodeset}"
nodeset = nodeset.collect{|n| n.parent}.compact
#nodeset = expr(path_stack.dclone, nodeset.collect{|n| n.parent}.compact)
#puts "PARENT 2: nodeset = #{nodeset.inspect}"
node_types = ELEMENTS
when :ancestor
new_nodeset = []
nodeset.each do |node|
while node.parent
node = node.parent
new_nodeset << node unless new_nodeset.include? node
end
end
nodeset = new_nodeset
node_types = ELEMENTS
when :ancestor_or_self
new_nodeset = []
nodeset.each do |node|
if node.node_type == :element
new_nodeset << node
while ( node.parent )
node = node.parent
new_nodeset << node unless new_nodeset.include? node
end
end
end
nodeset = new_nodeset
node_types = ELEMENTS
when :predicate
new_nodeset = []
subcontext = { :size => nodeset.size }
pred = path_stack.shift
nodeset.each_with_index { |node, index|
subcontext[ :node ] = node
#puts "PREDICATE SETTING CONTEXT INDEX TO #{index+1}"
subcontext[ :index ] = index+1
pc = pred.dclone
#puts "#{node.hash}) Recursing with #{pred.inspect} and [#{node.inspect}]"
result = expr( pc, [node], subcontext )
result = result[0] if result.kind_of? Array and result.length == 1
#puts "#{node.hash}) Result = #{result.inspect} (#{result.class.name})"
if result.kind_of? Numeric
#puts "Adding node #{node.inspect}" if result == (index+1)
new_nodeset << node if result == (index+1)
elsif result.instance_of? Array
if result.size > 0 and result.inject(false) {|k,s| s or k}
#puts "Adding node #{node.inspect}" if result.size > 0
new_nodeset << node if result.size > 0
end
else
#puts "Adding node #{node.inspect}" if result
new_nodeset << node if result
end
}
#puts "New nodeset = #{new_nodeset.inspect}"
#puts "Path_stack = #{path_stack.inspect}"
nodeset = new_nodeset
=begin
predicate = path_stack.shift
ns = nodeset.clone
result = expr( predicate, ns )
#puts "Result = #{result.inspect} (#{result.class.name})"
#puts "nodeset = #{nodeset.inspect}"
if result.kind_of? Array
nodeset = result.zip(ns).collect{|m,n| n if m}.compact
else
nodeset = result ? nodeset : []
end
#puts "Outgoing NS = #{nodeset.inspect}"
=end
when :descendant_or_self
rv = descendant_or_self( path_stack, nodeset )
path_stack.clear
nodeset = rv
node_types = ELEMENTS
when :descendant
results = []
nt = nil
nodeset.each do |node|
nt = node.node_type
results += expr( path_stack.dclone.unshift( :descendant_or_self ),
node.children ) if nt == :element or nt == :document
end
nodeset = results
node_types = ELEMENTS
when :following_sibling
#puts "FOLLOWING_SIBLING 1: nodeset = #{nodeset}"
results = []
nodeset.each do |node|
next if node.parent.nil?
all_siblings = node.parent.children
current_index = all_siblings.index( node )
following_siblings = all_siblings[ current_index+1 .. -1 ]
results += expr( path_stack.dclone, following_siblings )
end
#puts "FOLLOWING_SIBLING 2: nodeset = #{nodeset}"
nodeset = results
when :preceding_sibling
results = []
nodeset.each do |node|
next if node.parent.nil?
all_siblings = node.parent.children
current_index = all_siblings.index( node )
preceding_siblings = all_siblings[ 0, current_index ].reverse
results += preceding_siblings
end
nodeset = results
node_types = ELEMENTS
when :preceding
new_nodeset = []
nodeset.each do |node|
new_nodeset += preceding( node )
end
#puts "NEW NODESET => #{new_nodeset.inspect}"
nodeset = new_nodeset
node_types = ELEMENTS
when :following
new_nodeset = []
nodeset.each do |node|
new_nodeset += following( node )
end
nodeset = new_nodeset
node_types = ELEMENTS
when :namespace
#puts "In :namespace"
new_nodeset = []
prefix = path_stack.shift
nodeset.each do |node|
if (node.node_type == :element or node.node_type == :attribute)
if @namespaces
namespaces = @namespaces
elsif (node.node_type == :element)
namespaces = node.namespaces
else
namespaces = node.element.namesapces
end
#puts "Namespaces = #{namespaces.inspect}"
#puts "Prefix = #{prefix.inspect}"
#puts "Node.namespace = #{node.namespace}"
if (node.namespace == namespaces[prefix])
new_nodeset << node
end
end
end
nodeset = new_nodeset
when :variable
var_name = path_stack.shift
return @variables[ var_name ]
# :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
# TODO: Special case for :or and :and -- not evaluate the right
# operand if the left alone determines result (i.e. is true for
# :or and false for :and).
when :eq, :neq, :lt, :lteq, :gt, :gteq, :and, :or
left = expr( path_stack.shift, nodeset.dup, context )
#puts "LEFT => #{left.inspect} (#{left.class.name})"
right = expr( path_stack.shift, nodeset.dup, context )
#puts "RIGHT => #{right.inspect} (#{right.class.name})"
res = equality_relational_compare( left, op, right )
#puts "RES => #{res.inspect}"
return res
when :and
left = expr( path_stack.shift, nodeset.dup, context )
#puts "LEFT => #{left.inspect} (#{left.class.name})"
if left == false || left.nil? || !left.inject(false) {|a,b| a | b}
return []
end
right = expr( path_stack.shift, nodeset.dup, context )
#puts "RIGHT => #{right.inspect} (#{right.class.name})"
res = equality_relational_compare( left, op, right )
#puts "RES => #{res.inspect}"
return res
when :div
left = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
right = Functions::number(expr(path_stack.shift, nodeset, context)).to_f
return (left / right)
when :mod
left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
return (left % right)
when :mult
left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
return (left * right)
when :plus
left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
return (left + right)
when :minus
left = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
right = Functions::number(expr(path_stack.shift, nodeset, context )).to_f
return (left - right)
when :union
left = expr( path_stack.shift, nodeset, context )
right = expr( path_stack.shift, nodeset, context )
return (left | right)
when :neg
res = expr( path_stack, nodeset, context )
return -(res.to_f)
when :not
when :function
func_name = path_stack.shift.tr('-','_')
arguments = path_stack.shift
#puts "FUNCTION 0: #{func_name}(#{arguments.collect{|a|a.inspect}.join(', ')})"
subcontext = context ? nil : { :size => nodeset.size }
res = []
cont = context
nodeset.each_with_index { |n, i|
if subcontext
subcontext[:node] = n
subcontext[:index] = i
cont = subcontext
end
arg_clone = arguments.dclone
args = arg_clone.collect { |arg|
#puts "FUNCTION 1: Calling expr( #{arg.inspect}, [#{n.inspect}] )"
expr( arg, [n], cont )
}
#puts "FUNCTION 2: #{func_name}(#{args.collect{|a|a.inspect}.join(', ')})"
Functions.context = cont
res << Functions.send( func_name, *args )
#puts "FUNCTION 3: #{res[-1].inspect}"
}
return res
end
end # while
#puts "EXPR returning #{nodeset.inspect}"
return nodeset
end
##########################################################
# FIXME
# The next two methods are BAD MOJO!
# This is my achilles heel. If anybody thinks of a better
# way of doing this, be my guest. This really sucks, but
# it is a wonder it works at all.
# ########################################################
def descendant_or_self( path_stack, nodeset )
rs = []
#puts "#"*80
#puts "PATH_STACK = #{path_stack.inspect}"
#puts "NODESET = #{nodeset.collect{|n|n.inspect}.inspect}"
d_o_s( path_stack, nodeset, rs )
#puts "RS = #{rs.collect{|n|n.inspect}.inspect}"
document_order(rs.flatten.compact)
#rs.flatten.compact
end
def d_o_s( p, ns, r )
#puts "IN DOS with #{ns.inspect}; ALREADY HAVE #{r.inspect}"
nt = nil
ns.each_index do |i|
n = ns[i]
#puts "P => #{p.inspect}"
x = expr( p.dclone, [ n ] )
nt = n.node_type
d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0
r.concat(x) if x.size > 0
end
end
# Reorders an array of nodes so that they are in document order
# It tries to do this efficiently.
#
# FIXME: I need to get rid of this, but the issue is that most of the XPath
# interpreter functions as a filter, which means that we lose context going
# in and out of function calls. If I knew what the index of the nodes was,
# I wouldn't have to do this. Maybe add a document IDX for each node?
# Problems with mutable documents. Or, rewrite everything.
def document_order( array_of_nodes )
new_arry = []
array_of_nodes.each { |node|
node_idx = []
np = node.node_type == :attribute ? node.element : node
while np.parent and np.parent.node_type == :element
node_idx << np.parent.index( np )
np = np.parent
end
new_arry << [ node_idx.reverse, node ]
}
#puts "new_arry = #{new_arry.inspect}"
new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] }
end
def recurse( nodeset, &block )
for node in nodeset
yield node
recurse( node, &block ) if node.node_type == :element
end
end
# Builds a nodeset of all of the preceding nodes of the supplied node,
# in reverse document order
# preceding:: includes every element in the document that precedes this node,
# except for ancestors
def preceding( node )
#puts "IN PRECEDING"
ancestors = []
p = node.parent
while p
ancestors << p
p = p.parent
end
acc = []
p = preceding_node_of( node )
#puts "P = #{p.inspect}"
while p
if ancestors.include? p
ancestors.delete(p)
else
acc << p
end
p = preceding_node_of( p )
#puts "P = #{p.inspect}"
end
acc
end
def preceding_node_of( node )
#puts "NODE: #{node.inspect}"
#puts "PREVIOUS NODE: #{node.previous_sibling_node.inspect}"
#puts "PARENT NODE: #{node.parent}"
psn = node.previous_sibling_node
if psn.nil?
if node.parent.nil? or node.parent.class == Document
return nil
end
return node.parent
#psn = preceding_node_of( node.parent )
end
while psn and psn.kind_of? Element and psn.children.size > 0
psn = psn.children[-1]
end
psn
end
def following( node )
#puts "IN PRECEDING"
acc = []
p = next_sibling_node( node )
#puts "P = #{p.inspect}"
while p
acc << p
p = following_node_of( p )
#puts "P = #{p.inspect}"
end
acc
end
def following_node_of( node )
#puts "NODE: #{node.inspect}"
#puts "PREVIOUS NODE: #{node.previous_sibling_node.inspect}"
#puts "PARENT NODE: #{node.parent}"
if node.kind_of? Element and node.children.size > 0
return node.children[0]
end
return next_sibling_node(node)
end
def next_sibling_node(node)
psn = node.next_sibling_node
while psn.nil?
if node.parent.nil? or node.parent.class == Document
return nil
end
node = node.parent
psn = node.next_sibling_node
#puts "psn = #{psn.inspect}"
end
return psn
end
def norm b
case b
when true, false
return b
when 'true', 'false'
return Functions::boolean( b )
when /^\d+(\.\d+)?$/
return Functions::number( b )
else
return Functions::string( b )
end
end
def equality_relational_compare( set1, op, set2 )
#puts "EQ_REL_COMP(#{set1.inspect} #{op.inspect} #{set2.inspect})"
if set1.kind_of? Array and set2.kind_of? Array
#puts "#{set1.size} & #{set2.size}"
if set1.size == 1 and set2.size == 1
set1 = set1[0]
set2 = set2[0]
elsif set1.size == 0 or set2.size == 0
nd = set1.size==0 ? set2 : set1
rv = nd.collect { |il| compare( il, op, nil ) }
#puts "RV = #{rv.inspect}"
return rv
else
res = []
enum = SyncEnumerator.new( set1, set2 ).each { |i1, i2|
#puts "i1 = #{i1.inspect} (#{i1.class.name})"
#puts "i2 = #{i2.inspect} (#{i2.class.name})"
i1 = norm( i1 )
i2 = norm( i2 )
res << compare( i1, op, i2 )
}
return res
end
end
#puts "EQ_REL_COMP: #{set1.inspect} (#{set1.class.name}), #{op}, #{set2.inspect} (#{set2.class.name})"
#puts "COMPARING VALUES"
# If one is nodeset and other is number, compare number to each item
# in nodeset s.t. number op number(string(item))
# If one is nodeset and other is string, compare string to each item
# in nodeset s.t. string op string(item)
# If one is nodeset and other is boolean, compare boolean to each item
# in nodeset s.t. boolean op boolean(item)
if set1.kind_of? Array or set2.kind_of? Array
#puts "ISA ARRAY"
if set1.kind_of? Array
a = set1
b = set2
else
a = set2
b = set1
end
case b
when true, false
return a.collect {|v| compare( Functions::boolean(v), op, b ) }
when Numeric
return a.collect {|v| compare( Functions::number(v), op, b )}
when /^\d+(\.\d+)?$/
b = Functions::number( b )
#puts "B = #{b.inspect}"
return a.collect {|v| compare( Functions::number(v), op, b )}
else
#puts "Functions::string( #{b}(#{b.class.name}) ) = #{Functions::string(b)}"
b = Functions::string( b )
return a.collect { |v| compare( Functions::string(v), op, b ) }
end
else
# If neither is nodeset,
# If op is = or !=
# If either boolean, convert to boolean
# If either number, convert to number
# Else, convert to string
# Else
# Convert both to numbers and compare
s1 = set1.to_s
s2 = set2.to_s
#puts "EQ_REL_COMP: #{set1}=>#{s1}, #{set2}=>#{s2}"
if s1 == 'true' or s1 == 'false' or s2 == 'true' or s2 == 'false'
#puts "Functions::boolean(#{set1})=>#{Functions::boolean(set1)}"
#puts "Functions::boolean(#{set2})=>#{Functions::boolean(set2)}"
set1 = Functions::boolean( set1 )
set2 = Functions::boolean( set2 )
else
if op == :eq or op == :neq
if s1 =~ /^\d+(\.\d+)?$/ or s2 =~ /^\d+(\.\d+)?$/
set1 = Functions::number( s1 )
set2 = Functions::number( s2 )
else
set1 = Functions::string( set1 )
set2 = Functions::string( set2 )
end
else
set1 = Functions::number( set1 )
set2 = Functions::number( set2 )
end
end
#puts "EQ_REL_COMP: #{set1} #{op} #{set2}"
#puts ">>> #{compare( set1, op, set2 )}"
return compare( set1, op, set2 )
end
return false
end
def compare a, op, b
#puts "COMPARE #{a.inspect}(#{a.class.name}) #{op} #{b.inspect}(#{b.class.name})"
case op
when :eq
a == b
when :neq
a != b
when :lt
a < b
when :lteq
a <= b
when :gt
a > b
when :gteq
a >= b
when :and
a and b
when :or
a or b
else
false
end
end
end
end