instiki/vendor/plugins/rexml/lib/rexml/parsers/xpathparser.rb
Jacques Distler 38ae064b8a Bundle Latest REXML
Sam Ruby has been doing a bang-up job fixing the bugs in REXML.
Who knows when these improvements will trickle down to vendor distributions of Ruby.
In the meantime, let's bundle the latest version of REXML with Instiki.
We check the version number of the bundled REXML against that of the System REXML, and use whichever is later.
2008-01-11 23:53:29 -06:00

698 lines
19 KiB
Ruby

require 'rexml/namespace'
require 'rexml/xmltokens'
module REXML
module Parsers
# You don't want to use this class. Really. Use XPath, which is a wrapper
# for this class. Believe me. You don't want to poke around in here.
# There is strange, dark magic at work in this code. Beware. Go back! Go
# back while you still can!
class XPathParser
include XMLTokens
LITERAL = /^'([^']*)'|^"([^"]*)"/u
def namespaces=( namespaces )
Functions::namespace_context = namespaces
@namespaces = namespaces
end
def parse path
path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
path.gsub!( /\s+([\]\)])/, '\1' )
parsed = []
path = OrExpr(path, parsed)
parsed
end
def predicate path
parsed = []
Predicate( "[#{path}]", parsed )
parsed
end
def abbreviate( path )
path = path.kind_of?(String) ? parse( path ) : path
string = ""
document = false
while path.size > 0
op = path.shift
case op
when :node
when :attribute
string << "/" if string.size > 0
string << "@"
when :child
string << "/" if string.size > 0
when :descendant_or_self
string << "/"
when :self
string << "."
when :parent
string << ".."
when :any
string << "*"
when :text
string << "text()"
when :following, :following_sibling,
:ancestor, :ancestor_or_self, :descendant,
:namespace, :preceding, :preceding_sibling
string << "/" unless string.size == 0
string << op.to_s.tr("_", "-")
string << "::"
when :qname
prefix = path.shift
name = path.shift
string << prefix+":" if prefix.size > 0
string << name
when :predicate
string << '['
string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
string << ']'
when :document
document = true
when :function
string << path.shift
string << "( "
string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
string << " )"
when :literal
string << %Q{ "#{path.shift}" }
else
string << "/" unless string.size == 0
string << "UNKNOWN("
string << op.inspect
string << ")"
end
end
string = "/"+string if document
return string
end
def expand( path )
path = path.kind_of?(String) ? parse( path ) : path
string = ""
document = false
while path.size > 0
op = path.shift
case op
when :node
string << "node()"
when :attribute, :child, :following, :following_sibling,
:ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
:namespace, :preceding, :preceding_sibling, :self, :parent
string << "/" unless string.size == 0
string << op.to_s.tr("_", "-")
string << "::"
when :any
string << "*"
when :qname
prefix = path.shift
name = path.shift
string << prefix+":" if prefix.size > 0
string << name
when :predicate
string << '['
string << predicate_to_string( path.shift ) { |x| expand(x) }
string << ']'
when :document
document = true
else
string << "/" unless string.size == 0
string << "UNKNOWN("
string << op.inspect
string << ")"
end
end
string = "/"+string if document
return string
end
def predicate_to_string( path, &block )
string = ""
case path[0]
when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
op = path.shift
case op
when :eq
op = "="
when :lt
op = "<"
when :gt
op = ">"
when :lteq
op = "<="
when :gteq
op = ">="
when :neq
op = "!="
when :union
op = "|"
end
left = predicate_to_string( path.shift, &block )
right = predicate_to_string( path.shift, &block )
string << " "
string << left
string << " "
string << op.to_s
string << " "
string << right
string << " "
when :function
path.shift
name = path.shift
string << name
string << "( "
string << predicate_to_string( path.shift, &block )
string << " )"
when :literal
path.shift
string << " "
string << path.shift.inspect
string << " "
else
string << " "
string << yield( path )
string << " "
end
return string.squeeze(" ")
end
private
#LocationPath
# | RelativeLocationPath
# | '/' RelativeLocationPath?
# | '//' RelativeLocationPath
def LocationPath path, parsed
#puts "LocationPath '#{path}'"
path = path.strip
if path[0] == ?/
parsed << :document
if path[1] == ?/
parsed << :descendant_or_self
parsed << :node
path = path[2..-1]
else
path = path[1..-1]
end
end
#puts parsed.inspect
return RelativeLocationPath( path, parsed ) if path.size > 0
end
#RelativeLocationPath
# | Step
# | (AXIS_NAME '::' | '@' | '') AxisSpecifier
# NodeTest
# Predicate
# | '.' | '..' AbbreviatedStep
# | RelativeLocationPath '/' Step
# | RelativeLocationPath '//' Step
AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
def RelativeLocationPath path, parsed
#puts "RelativeLocationPath #{path}"
while path.size > 0
# (axis or @ or <child::>) nodetest predicate >
# OR > / Step
# (. or ..) >
if path[0] == ?.
if path[1] == ?.
parsed << :parent
parsed << :node
path = path[2..-1]
else
parsed << :self
parsed << :node
path = path[1..-1]
end
else
if path[0] == ?@
#puts "ATTRIBUTE"
parsed << :attribute
path = path[1..-1]
# Goto Nodetest
elsif path =~ AXIS
parsed << $1.tr('-','_').intern
path = $'
# Goto Nodetest
else
parsed << :child
end
#puts "NODETESTING '#{path}'"
n = []
path = NodeTest( path, n)
#puts "NODETEST RETURNED '#{path}'"
if path[0] == ?[
path = Predicate( path, n )
end
parsed.concat(n)
end
if path.size > 0
if path[0] == ?/
if path[1] == ?/
parsed << :descendant_or_self
parsed << :node
path = path[2..-1]
else
path = path[1..-1]
end
else
return path
end
end
end
return path
end
# Returns a 1-1 map of the nodeset
# The contents of the resulting array are either:
# true/false, if a positive match
# String, if a name match
#NodeTest
# | ('*' | NCNAME ':' '*' | QNAME) NameTest
# | NODE_TYPE '(' ')' NodeType
# | PI '(' LITERAL ')' PI
# | '[' expr ']' Predicate
NCNAMETEST= /^(#{NCNAME_STR}):\*/u
QNAME = Namespace::NAMESPLIT
NODE_TYPE = /^(comment|text|node)\(\s*\)/m
PI = /^processing-instruction\(/
def NodeTest path, parsed
#puts "NodeTest with #{path}"
res = nil
case path
when /^\*/
path = $'
parsed << :any
when NODE_TYPE
type = $1
path = $'
parsed << type.tr('-', '_').intern
when PI
path = $'
literal = nil
if path !~ /^\s*\)/
path =~ LITERAL
literal = $1
path = $'
raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
path = path[1..-1]
end
parsed << :processing_instruction
parsed << (literal || '')
when NCNAMETEST
#puts "NCNAMETEST"
prefix = $1
path = $'
parsed << :namespace
parsed << prefix
when QNAME
#puts "QNAME"
prefix = $1
name = $2
path = $'
prefix = "" unless prefix
parsed << :qname
parsed << prefix
parsed << name
end
return path
end
# Filters the supplied nodeset on the predicate(s)
def Predicate path, parsed
#puts "PREDICATE with #{path}"
return nil unless path[0] == ?[
predicates = []
while path[0] == ?[
path, expr = get_group(path)
predicates << expr[1..-2] if expr
end
#puts "PREDICATES = #{predicates.inspect}"
predicates.each{ |pred|
#puts "ORING #{pred}"
preds = []
parsed << :predicate
parsed << preds
OrExpr(pred, preds)
}
#puts "PREDICATES = #{predicates.inspect}"
path
end
# The following return arrays of true/false, a 1-1 mapping of the
# supplied nodeset, except for axe(), which returns a filtered
# nodeset
#| OrExpr S 'or' S AndExpr
#| AndExpr
def OrExpr path, parsed
#puts "OR >>> #{path}"
n = []
rest = AndExpr( path, n )
#puts "OR <<< #{rest}"
if rest != path
while rest =~ /^\s*( or )/
n = [ :or, n, [] ]
rest = AndExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| AndExpr S 'and' S EqualityExpr
#| EqualityExpr
def AndExpr path, parsed
#puts "AND >>> #{path}"
n = []
rest = EqualityExpr( path, n )
#puts "AND <<< #{rest}"
if rest != path
while rest =~ /^\s*( and )/
n = [ :and, n, [] ]
#puts "AND >>> #{rest}"
rest = EqualityExpr( $', n[-1] )
#puts "AND <<< #{rest}"
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| EqualityExpr ('=' | '!=') RelationalExpr
#| RelationalExpr
def EqualityExpr path, parsed
#puts "EQUALITY >>> #{path}"
n = []
rest = RelationalExpr( path, n )
#puts "EQUALITY <<< #{rest}"
if rest != path
while rest =~ /^\s*(!?=)\s*/
if $1[0] == ?!
n = [ :neq, n, [] ]
else
n = [ :eq, n, [] ]
end
rest = RelationalExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
#| AdditiveExpr
def RelationalExpr path, parsed
#puts "RELATION >>> #{path}"
n = []
rest = AdditiveExpr( path, n )
#puts "RELATION <<< #{rest}"
if rest != path
while rest =~ /^\s*([<>]=?)\s*/
if $1[0] == ?<
sym = "lt"
else
sym = "gt"
end
sym << "eq" if $1[-1] == ?=
n = [ sym.intern, n, [] ]
rest = AdditiveExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| AdditiveExpr ('+' | S '-') MultiplicativeExpr
#| MultiplicativeExpr
def AdditiveExpr path, parsed
#puts "ADDITIVE >>> #{path}"
n = []
rest = MultiplicativeExpr( path, n )
#puts "ADDITIVE <<< #{rest}"
if rest != path
while rest =~ /^\s*(\+| -)\s*/
if $1[0] == ?+
n = [ :plus, n, [] ]
else
n = [ :minus, n, [] ]
end
rest = MultiplicativeExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
#| UnaryExpr
def MultiplicativeExpr path, parsed
#puts "MULT >>> #{path}"
n = []
rest = UnaryExpr( path, n )
#puts "MULT <<< #{rest}"
if rest != path
while rest =~ /^\s*(\*| div | mod )\s*/
if $1[0] == ?*
n = [ :mult, n, [] ]
elsif $1.include?( "div" )
n = [ :div, n, [] ]
else
n = [ :mod, n, [] ]
end
rest = UnaryExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace(n)
elsif n.size > 0
parsed << n
end
rest
end
#| '-' UnaryExpr
#| UnionExpr
def UnaryExpr path, parsed
path =~ /^(\-*)/
path = $'
if $1 and (($1.size % 2) != 0)
mult = -1
else
mult = 1
end
parsed << :neg if mult < 0
#puts "UNARY >>> #{path}"
n = []
path = UnionExpr( path, n )
#puts "UNARY <<< #{path}"
parsed.concat( n )
path
end
#| UnionExpr '|' PathExpr
#| PathExpr
def UnionExpr path, parsed
#puts "UNION >>> #{path}"
n = []
rest = PathExpr( path, n )
#puts "UNION <<< #{rest}"
if rest != path
while rest =~ /^\s*(\|)\s*/
n = [ :union, n, [] ]
rest = PathExpr( $', n[-1] )
end
end
if parsed.size == 0 and n.size != 0
parsed.replace( n )
elsif n.size > 0
parsed << n
end
rest
end
#| LocationPath
#| FilterExpr ('/' | '//') RelativeLocationPath
def PathExpr path, parsed
path =~ /^\s*/
path = $'
#puts "PATH >>> #{path}"
n = []
rest = FilterExpr( path, n )
#puts "PATH <<< '#{rest}'"
if rest != path
if rest and rest[0] == ?/
return RelativeLocationPath(rest, n)
end
end
#puts "BEFORE WITH '#{rest}'"
rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w_*]/
parsed.concat(n)
return rest
end
#| FilterExpr Predicate
#| PrimaryExpr
def FilterExpr path, parsed
#puts "FILTER >>> #{path}"
n = []
path = PrimaryExpr( path, n )
#puts "FILTER <<< #{path}"
path = Predicate(path, n) if path and path[0] == ?[
#puts "FILTER <<< #{path}"
parsed.concat(n)
path
end
#| VARIABLE_REFERENCE
#| '(' expr ')'
#| LITERAL
#| NUMBER
#| FunctionCall
VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u
NUMBER = /^(\d*\.?\d+)/
NT = /^comment|text|processing-instruction|node$/
def PrimaryExpr path, parsed
arry = []
case path
when VARIABLE_REFERENCE
varname = $1
path = $'
parsed << :variable
parsed << varname
#arry << @variables[ varname ]
when /^(\w[-\w]*)(?:\()/
#puts "PrimaryExpr :: Function >>> #$1 -- '#$''"
fname = $1
tmp = $'
#puts "#{fname} =~ #{NT.inspect}"
return path if fname =~ NT
path = tmp
parsed << :function
parsed << fname
path = FunctionCall(path, parsed)
when NUMBER
#puts "LITERAL or NUMBER: #$1"
varname = $1.nil? ? $2 : $1
path = $'
parsed << :literal
parsed << (varname.include?('.') ? varname.to_f : varname.to_i)
when LITERAL
#puts "LITERAL or NUMBER: #$1"
varname = $1.nil? ? $2 : $1
path = $'
parsed << :literal
parsed << varname
when /^\(/ #/
path, contents = get_group(path)
contents = contents[1..-2]
n = []
OrExpr( contents, n )
parsed.concat(n)
end
path
end
#| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
def FunctionCall rest, parsed
path, arguments = parse_args(rest)
argset = []
for argument in arguments
args = []
OrExpr( argument, args )
argset << args
end
parsed << argset
path
end
# get_group( '[foo]bar' ) -> ['bar', '[foo]']
def get_group string
ind = 0
depth = 0
st = string[0,1]
en = (st == "(" ? ")" : "]")
begin
case string[ind,1]
when st
depth += 1
when en
depth -= 1
end
ind += 1
end while depth > 0 and ind < string.length
return nil unless depth==0
[string[ind..-1], string[0..ind-1]]
end
def parse_args( string )
arguments = []
ind = 0
inquot = false
inapos = false
depth = 1
begin
case string[ind]
when ?"
inquot = !inquot unless inapos
when ?'
inapos = !inapos unless inquot
else
unless inquot or inapos
case string[ind]
when ?(
depth += 1
if depth == 1
string = string[1..-1]
ind -= 1
end
when ?)
depth -= 1
if depth == 0
s = string[0,ind].strip
arguments << s unless s == ""
string = string[ind+1..-1]
end
when ?,
if depth == 1
s = string[0,ind].strip
arguments << s unless s == ""
string = string[ind+1..-1]
ind = -1
end
end
end
end
ind += 1
end while depth > 0 and ind < string.length
return nil unless depth==0
[string,arguments]
end
end
end
end