38ae064b8a
Sam Ruby has been doing a bang-up job fixing the bugs in REXML. Who knows when these improvements will trickle down to vendor distributions of Ruby. In the meantime, let's bundle the latest version of REXML with Instiki. We check the version number of the bundled REXML against that of the System REXML, and use whichever is later.
698 lines
19 KiB
Ruby
698 lines
19 KiB
Ruby
require 'rexml/namespace'
|
|
require 'rexml/xmltokens'
|
|
|
|
module REXML
|
|
module Parsers
|
|
# You don't want to use this class. Really. Use XPath, which is a wrapper
|
|
# for this class. Believe me. You don't want to poke around in here.
|
|
# There is strange, dark magic at work in this code. Beware. Go back! Go
|
|
# back while you still can!
|
|
class XPathParser
|
|
include XMLTokens
|
|
LITERAL = /^'([^']*)'|^"([^"]*)"/u
|
|
|
|
def namespaces=( namespaces )
|
|
Functions::namespace_context = namespaces
|
|
@namespaces = namespaces
|
|
end
|
|
|
|
def parse path
|
|
path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
|
|
path.gsub!( /\s+([\]\)])/, '\1' )
|
|
parsed = []
|
|
path = OrExpr(path, parsed)
|
|
parsed
|
|
end
|
|
|
|
def predicate path
|
|
parsed = []
|
|
Predicate( "[#{path}]", parsed )
|
|
parsed
|
|
end
|
|
|
|
def abbreviate( path )
|
|
path = path.kind_of?(String) ? parse( path ) : path
|
|
string = ""
|
|
document = false
|
|
while path.size > 0
|
|
op = path.shift
|
|
case op
|
|
when :node
|
|
when :attribute
|
|
string << "/" if string.size > 0
|
|
string << "@"
|
|
when :child
|
|
string << "/" if string.size > 0
|
|
when :descendant_or_self
|
|
string << "/"
|
|
when :self
|
|
string << "."
|
|
when :parent
|
|
string << ".."
|
|
when :any
|
|
string << "*"
|
|
when :text
|
|
string << "text()"
|
|
when :following, :following_sibling,
|
|
:ancestor, :ancestor_or_self, :descendant,
|
|
:namespace, :preceding, :preceding_sibling
|
|
string << "/" unless string.size == 0
|
|
string << op.to_s.tr("_", "-")
|
|
string << "::"
|
|
when :qname
|
|
prefix = path.shift
|
|
name = path.shift
|
|
string << prefix+":" if prefix.size > 0
|
|
string << name
|
|
when :predicate
|
|
string << '['
|
|
string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
|
|
string << ']'
|
|
when :document
|
|
document = true
|
|
when :function
|
|
string << path.shift
|
|
string << "( "
|
|
string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
|
|
string << " )"
|
|
when :literal
|
|
string << %Q{ "#{path.shift}" }
|
|
else
|
|
string << "/" unless string.size == 0
|
|
string << "UNKNOWN("
|
|
string << op.inspect
|
|
string << ")"
|
|
end
|
|
end
|
|
string = "/"+string if document
|
|
return string
|
|
end
|
|
|
|
def expand( path )
|
|
path = path.kind_of?(String) ? parse( path ) : path
|
|
string = ""
|
|
document = false
|
|
while path.size > 0
|
|
op = path.shift
|
|
case op
|
|
when :node
|
|
string << "node()"
|
|
when :attribute, :child, :following, :following_sibling,
|
|
:ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
|
|
:namespace, :preceding, :preceding_sibling, :self, :parent
|
|
string << "/" unless string.size == 0
|
|
string << op.to_s.tr("_", "-")
|
|
string << "::"
|
|
when :any
|
|
string << "*"
|
|
when :qname
|
|
prefix = path.shift
|
|
name = path.shift
|
|
string << prefix+":" if prefix.size > 0
|
|
string << name
|
|
when :predicate
|
|
string << '['
|
|
string << predicate_to_string( path.shift ) { |x| expand(x) }
|
|
string << ']'
|
|
when :document
|
|
document = true
|
|
else
|
|
string << "/" unless string.size == 0
|
|
string << "UNKNOWN("
|
|
string << op.inspect
|
|
string << ")"
|
|
end
|
|
end
|
|
string = "/"+string if document
|
|
return string
|
|
end
|
|
|
|
def predicate_to_string( path, &block )
|
|
string = ""
|
|
case path[0]
|
|
when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
|
|
op = path.shift
|
|
case op
|
|
when :eq
|
|
op = "="
|
|
when :lt
|
|
op = "<"
|
|
when :gt
|
|
op = ">"
|
|
when :lteq
|
|
op = "<="
|
|
when :gteq
|
|
op = ">="
|
|
when :neq
|
|
op = "!="
|
|
when :union
|
|
op = "|"
|
|
end
|
|
left = predicate_to_string( path.shift, &block )
|
|
right = predicate_to_string( path.shift, &block )
|
|
string << " "
|
|
string << left
|
|
string << " "
|
|
string << op.to_s
|
|
string << " "
|
|
string << right
|
|
string << " "
|
|
when :function
|
|
path.shift
|
|
name = path.shift
|
|
string << name
|
|
string << "( "
|
|
string << predicate_to_string( path.shift, &block )
|
|
string << " )"
|
|
when :literal
|
|
path.shift
|
|
string << " "
|
|
string << path.shift.inspect
|
|
string << " "
|
|
else
|
|
string << " "
|
|
string << yield( path )
|
|
string << " "
|
|
end
|
|
return string.squeeze(" ")
|
|
end
|
|
|
|
private
|
|
#LocationPath
|
|
# | RelativeLocationPath
|
|
# | '/' RelativeLocationPath?
|
|
# | '//' RelativeLocationPath
|
|
def LocationPath path, parsed
|
|
#puts "LocationPath '#{path}'"
|
|
path = path.strip
|
|
if path[0] == ?/
|
|
parsed << :document
|
|
if path[1] == ?/
|
|
parsed << :descendant_or_self
|
|
parsed << :node
|
|
path = path[2..-1]
|
|
else
|
|
path = path[1..-1]
|
|
end
|
|
end
|
|
#puts parsed.inspect
|
|
return RelativeLocationPath( path, parsed ) if path.size > 0
|
|
end
|
|
|
|
#RelativeLocationPath
|
|
# | Step
|
|
# | (AXIS_NAME '::' | '@' | '') AxisSpecifier
|
|
# NodeTest
|
|
# Predicate
|
|
# | '.' | '..' AbbreviatedStep
|
|
# | RelativeLocationPath '/' Step
|
|
# | RelativeLocationPath '//' Step
|
|
AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
|
|
def RelativeLocationPath path, parsed
|
|
#puts "RelativeLocationPath #{path}"
|
|
while path.size > 0
|
|
# (axis or @ or <child::>) nodetest predicate >
|
|
# OR > / Step
|
|
# (. or ..) >
|
|
if path[0] == ?.
|
|
if path[1] == ?.
|
|
parsed << :parent
|
|
parsed << :node
|
|
path = path[2..-1]
|
|
else
|
|
parsed << :self
|
|
parsed << :node
|
|
path = path[1..-1]
|
|
end
|
|
else
|
|
if path[0] == ?@
|
|
#puts "ATTRIBUTE"
|
|
parsed << :attribute
|
|
path = path[1..-1]
|
|
# Goto Nodetest
|
|
elsif path =~ AXIS
|
|
parsed << $1.tr('-','_').intern
|
|
path = $'
|
|
# Goto Nodetest
|
|
else
|
|
parsed << :child
|
|
end
|
|
|
|
#puts "NODETESTING '#{path}'"
|
|
n = []
|
|
path = NodeTest( path, n)
|
|
#puts "NODETEST RETURNED '#{path}'"
|
|
|
|
if path[0] == ?[
|
|
path = Predicate( path, n )
|
|
end
|
|
|
|
parsed.concat(n)
|
|
end
|
|
|
|
if path.size > 0
|
|
if path[0] == ?/
|
|
if path[1] == ?/
|
|
parsed << :descendant_or_self
|
|
parsed << :node
|
|
path = path[2..-1]
|
|
else
|
|
path = path[1..-1]
|
|
end
|
|
else
|
|
return path
|
|
end
|
|
end
|
|
end
|
|
return path
|
|
end
|
|
|
|
# Returns a 1-1 map of the nodeset
|
|
# The contents of the resulting array are either:
|
|
# true/false, if a positive match
|
|
# String, if a name match
|
|
#NodeTest
|
|
# | ('*' | NCNAME ':' '*' | QNAME) NameTest
|
|
# | NODE_TYPE '(' ')' NodeType
|
|
# | PI '(' LITERAL ')' PI
|
|
# | '[' expr ']' Predicate
|
|
NCNAMETEST= /^(#{NCNAME_STR}):\*/u
|
|
QNAME = Namespace::NAMESPLIT
|
|
NODE_TYPE = /^(comment|text|node)\(\s*\)/m
|
|
PI = /^processing-instruction\(/
|
|
def NodeTest path, parsed
|
|
#puts "NodeTest with #{path}"
|
|
res = nil
|
|
case path
|
|
when /^\*/
|
|
path = $'
|
|
parsed << :any
|
|
when NODE_TYPE
|
|
type = $1
|
|
path = $'
|
|
parsed << type.tr('-', '_').intern
|
|
when PI
|
|
path = $'
|
|
literal = nil
|
|
if path !~ /^\s*\)/
|
|
path =~ LITERAL
|
|
literal = $1
|
|
path = $'
|
|
raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
|
|
path = path[1..-1]
|
|
end
|
|
parsed << :processing_instruction
|
|
parsed << (literal || '')
|
|
when NCNAMETEST
|
|
#puts "NCNAMETEST"
|
|
prefix = $1
|
|
path = $'
|
|
parsed << :namespace
|
|
parsed << prefix
|
|
when QNAME
|
|
#puts "QNAME"
|
|
prefix = $1
|
|
name = $2
|
|
path = $'
|
|
prefix = "" unless prefix
|
|
parsed << :qname
|
|
parsed << prefix
|
|
parsed << name
|
|
end
|
|
return path
|
|
end
|
|
|
|
# Filters the supplied nodeset on the predicate(s)
|
|
def Predicate path, parsed
|
|
#puts "PREDICATE with #{path}"
|
|
return nil unless path[0] == ?[
|
|
predicates = []
|
|
while path[0] == ?[
|
|
path, expr = get_group(path)
|
|
predicates << expr[1..-2] if expr
|
|
end
|
|
#puts "PREDICATES = #{predicates.inspect}"
|
|
predicates.each{ |pred|
|
|
#puts "ORING #{pred}"
|
|
preds = []
|
|
parsed << :predicate
|
|
parsed << preds
|
|
OrExpr(pred, preds)
|
|
}
|
|
#puts "PREDICATES = #{predicates.inspect}"
|
|
path
|
|
end
|
|
|
|
# The following return arrays of true/false, a 1-1 mapping of the
|
|
# supplied nodeset, except for axe(), which returns a filtered
|
|
# nodeset
|
|
|
|
#| OrExpr S 'or' S AndExpr
|
|
#| AndExpr
|
|
def OrExpr path, parsed
|
|
#puts "OR >>> #{path}"
|
|
n = []
|
|
rest = AndExpr( path, n )
|
|
#puts "OR <<< #{rest}"
|
|
if rest != path
|
|
while rest =~ /^\s*( or )/
|
|
n = [ :or, n, [] ]
|
|
rest = AndExpr( $', n[-1] )
|
|
end
|
|
end
|
|
if parsed.size == 0 and n.size != 0
|
|
parsed.replace(n)
|
|
elsif n.size > 0
|
|
parsed << n
|
|
end
|
|
rest
|
|
end
|
|
|
|
#| AndExpr S 'and' S EqualityExpr
|
|
#| EqualityExpr
|
|
def AndExpr path, parsed
|
|
#puts "AND >>> #{path}"
|
|
n = []
|
|
rest = EqualityExpr( path, n )
|
|
#puts "AND <<< #{rest}"
|
|
if rest != path
|
|
while rest =~ /^\s*( and )/
|
|
n = [ :and, n, [] ]
|
|
#puts "AND >>> #{rest}"
|
|
rest = EqualityExpr( $', n[-1] )
|
|
#puts "AND <<< #{rest}"
|
|
end
|
|
end
|
|
if parsed.size == 0 and n.size != 0
|
|
parsed.replace(n)
|
|
elsif n.size > 0
|
|
parsed << n
|
|
end
|
|
rest
|
|
end
|
|
|
|
#| EqualityExpr ('=' | '!=') RelationalExpr
|
|
#| RelationalExpr
|
|
def EqualityExpr path, parsed
|
|
#puts "EQUALITY >>> #{path}"
|
|
n = []
|
|
rest = RelationalExpr( path, n )
|
|
#puts "EQUALITY <<< #{rest}"
|
|
if rest != path
|
|
while rest =~ /^\s*(!?=)\s*/
|
|
if $1[0] == ?!
|
|
n = [ :neq, n, [] ]
|
|
else
|
|
n = [ :eq, n, [] ]
|
|
end
|
|
rest = RelationalExpr( $', n[-1] )
|
|
end
|
|
end
|
|
if parsed.size == 0 and n.size != 0
|
|
parsed.replace(n)
|
|
elsif n.size > 0
|
|
parsed << n
|
|
end
|
|
rest
|
|
end
|
|
|
|
#| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
|
|
#| AdditiveExpr
|
|
def RelationalExpr path, parsed
|
|
#puts "RELATION >>> #{path}"
|
|
n = []
|
|
rest = AdditiveExpr( path, n )
|
|
#puts "RELATION <<< #{rest}"
|
|
if rest != path
|
|
while rest =~ /^\s*([<>]=?)\s*/
|
|
if $1[0] == ?<
|
|
sym = "lt"
|
|
else
|
|
sym = "gt"
|
|
end
|
|
sym << "eq" if $1[-1] == ?=
|
|
n = [ sym.intern, n, [] ]
|
|
rest = AdditiveExpr( $', n[-1] )
|
|
end
|
|
end
|
|
if parsed.size == 0 and n.size != 0
|
|
parsed.replace(n)
|
|
elsif n.size > 0
|
|
parsed << n
|
|
end
|
|
rest
|
|
end
|
|
|
|
#| AdditiveExpr ('+' | S '-') MultiplicativeExpr
|
|
#| MultiplicativeExpr
|
|
def AdditiveExpr path, parsed
|
|
#puts "ADDITIVE >>> #{path}"
|
|
n = []
|
|
rest = MultiplicativeExpr( path, n )
|
|
#puts "ADDITIVE <<< #{rest}"
|
|
if rest != path
|
|
while rest =~ /^\s*(\+| -)\s*/
|
|
if $1[0] == ?+
|
|
n = [ :plus, n, [] ]
|
|
else
|
|
n = [ :minus, n, [] ]
|
|
end
|
|
rest = MultiplicativeExpr( $', n[-1] )
|
|
end
|
|
end
|
|
if parsed.size == 0 and n.size != 0
|
|
parsed.replace(n)
|
|
elsif n.size > 0
|
|
parsed << n
|
|
end
|
|
rest
|
|
end
|
|
|
|
#| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
|
|
#| UnaryExpr
|
|
def MultiplicativeExpr path, parsed
|
|
#puts "MULT >>> #{path}"
|
|
n = []
|
|
rest = UnaryExpr( path, n )
|
|
#puts "MULT <<< #{rest}"
|
|
if rest != path
|
|
while rest =~ /^\s*(\*| div | mod )\s*/
|
|
if $1[0] == ?*
|
|
n = [ :mult, n, [] ]
|
|
elsif $1.include?( "div" )
|
|
n = [ :div, n, [] ]
|
|
else
|
|
n = [ :mod, n, [] ]
|
|
end
|
|
rest = UnaryExpr( $', n[-1] )
|
|
end
|
|
end
|
|
if parsed.size == 0 and n.size != 0
|
|
parsed.replace(n)
|
|
elsif n.size > 0
|
|
parsed << n
|
|
end
|
|
rest
|
|
end
|
|
|
|
#| '-' UnaryExpr
|
|
#| UnionExpr
|
|
def UnaryExpr path, parsed
|
|
path =~ /^(\-*)/
|
|
path = $'
|
|
if $1 and (($1.size % 2) != 0)
|
|
mult = -1
|
|
else
|
|
mult = 1
|
|
end
|
|
parsed << :neg if mult < 0
|
|
|
|
#puts "UNARY >>> #{path}"
|
|
n = []
|
|
path = UnionExpr( path, n )
|
|
#puts "UNARY <<< #{path}"
|
|
parsed.concat( n )
|
|
path
|
|
end
|
|
|
|
#| UnionExpr '|' PathExpr
|
|
#| PathExpr
|
|
def UnionExpr path, parsed
|
|
#puts "UNION >>> #{path}"
|
|
n = []
|
|
rest = PathExpr( path, n )
|
|
#puts "UNION <<< #{rest}"
|
|
if rest != path
|
|
while rest =~ /^\s*(\|)\s*/
|
|
n = [ :union, n, [] ]
|
|
rest = PathExpr( $', n[-1] )
|
|
end
|
|
end
|
|
if parsed.size == 0 and n.size != 0
|
|
parsed.replace( n )
|
|
elsif n.size > 0
|
|
parsed << n
|
|
end
|
|
rest
|
|
end
|
|
|
|
#| LocationPath
|
|
#| FilterExpr ('/' | '//') RelativeLocationPath
|
|
def PathExpr path, parsed
|
|
path =~ /^\s*/
|
|
path = $'
|
|
#puts "PATH >>> #{path}"
|
|
n = []
|
|
rest = FilterExpr( path, n )
|
|
#puts "PATH <<< '#{rest}'"
|
|
if rest != path
|
|
if rest and rest[0] == ?/
|
|
return RelativeLocationPath(rest, n)
|
|
end
|
|
end
|
|
#puts "BEFORE WITH '#{rest}'"
|
|
rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/
|
|
parsed.concat(n)
|
|
return rest
|
|
end
|
|
|
|
#| FilterExpr Predicate
|
|
#| PrimaryExpr
|
|
def FilterExpr path, parsed
|
|
#puts "FILTER >>> #{path}"
|
|
n = []
|
|
path = PrimaryExpr( path, n )
|
|
#puts "FILTER <<< #{path}"
|
|
path = Predicate(path, n) if path and path[0] == ?[
|
|
#puts "FILTER <<< #{path}"
|
|
parsed.concat(n)
|
|
path
|
|
end
|
|
|
|
#| VARIABLE_REFERENCE
|
|
#| '(' expr ')'
|
|
#| LITERAL
|
|
#| NUMBER
|
|
#| FunctionCall
|
|
VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u
|
|
NUMBER = /^(\d*\.?\d+)/
|
|
NT = /^comment|text|processing-instruction|node$/
|
|
def PrimaryExpr path, parsed
|
|
arry = []
|
|
case path
|
|
when VARIABLE_REFERENCE
|
|
varname = $1
|
|
path = $'
|
|
parsed << :variable
|
|
parsed << varname
|
|
#arry << @variables[ varname ]
|
|
when /^(\w[-\w]*)(?:\()/
|
|
#puts "PrimaryExpr :: Function >>> #$1 -- '#$''"
|
|
fname = $1
|
|
tmp = $'
|
|
#puts "#{fname} =~ #{NT.inspect}"
|
|
return path if fname =~ NT
|
|
path = tmp
|
|
parsed << :function
|
|
parsed << fname
|
|
path = FunctionCall(path, parsed)
|
|
when NUMBER
|
|
#puts "LITERAL or NUMBER: #$1"
|
|
varname = $1.nil? ? $2 : $1
|
|
path = $'
|
|
parsed << :literal
|
|
parsed << (varname.include?('.') ? varname.to_f : varname.to_i)
|
|
when LITERAL
|
|
#puts "LITERAL or NUMBER: #$1"
|
|
varname = $1.nil? ? $2 : $1
|
|
path = $'
|
|
parsed << :literal
|
|
parsed << varname
|
|
when /^\(/ #/
|
|
path, contents = get_group(path)
|
|
contents = contents[1..-2]
|
|
n = []
|
|
OrExpr( contents, n )
|
|
parsed.concat(n)
|
|
end
|
|
path
|
|
end
|
|
|
|
#| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
|
|
def FunctionCall rest, parsed
|
|
path, arguments = parse_args(rest)
|
|
argset = []
|
|
for argument in arguments
|
|
args = []
|
|
OrExpr( argument, args )
|
|
argset << args
|
|
end
|
|
parsed << argset
|
|
path
|
|
end
|
|
|
|
# get_group( '[foo]bar' ) -> ['bar', '[foo]']
|
|
def get_group string
|
|
ind = 0
|
|
depth = 0
|
|
st = string[0,1]
|
|
en = (st == "(" ? ")" : "]")
|
|
begin
|
|
case string[ind,1]
|
|
when st
|
|
depth += 1
|
|
when en
|
|
depth -= 1
|
|
end
|
|
ind += 1
|
|
end while depth > 0 and ind < string.length
|
|
return nil unless depth==0
|
|
[string[ind..-1], string[0..ind-1]]
|
|
end
|
|
|
|
def parse_args( string )
|
|
arguments = []
|
|
ind = 0
|
|
inquot = false
|
|
inapos = false
|
|
depth = 1
|
|
begin
|
|
case string[ind]
|
|
when ?"
|
|
inquot = !inquot unless inapos
|
|
when ?'
|
|
inapos = !inapos unless inquot
|
|
else
|
|
unless inquot or inapos
|
|
case string[ind]
|
|
when ?(
|
|
depth += 1
|
|
if depth == 1
|
|
string = string[1..-1]
|
|
ind -= 1
|
|
end
|
|
when ?)
|
|
depth -= 1
|
|
if depth == 0
|
|
s = string[0,ind].strip
|
|
arguments << s unless s == ""
|
|
string = string[ind+1..-1]
|
|
end
|
|
when ?,
|
|
if depth == 1
|
|
s = string[0,ind].strip
|
|
arguments << s unless s == ""
|
|
string = string[ind+1..-1]
|
|
ind = -1
|
|
end
|
|
end
|
|
end
|
|
end
|
|
ind += 1
|
|
end while depth > 0 and ind < string.length
|
|
return nil unless depth==0
|
|
[string,arguments]
|
|
end
|
|
end
|
|
end
|
|
end
|