optimizer broken. :/ (not compatible to ruby 1.9.1, changes needed)

This commit is contained in:
Denis Knauf 2010-03-19 12:11:16 +01:00
parent 203f8103e5
commit 7f98fd7d4b
2 changed files with 161 additions and 157 deletions

View file

@ -45,7 +45,7 @@ class Uri
'userinfo' => 'username ( ":" password ) ?', 'userinfo' => 'username ( ":" password ) ?',
'domainlabel' => 'alphadigit ( ( alphadigit | "-" ) * alphadigit ) ?', 'domainlabel' => 'alphadigit ( ( alphadigit | "-" ) * alphadigit ) ?',
'hostname' => 'domainlabel ( "." | domainlabel ) *', 'hostname' => 'domainlabel ( "." | domainlabel ) *',
'ipv4' => 'ipv4digits ( "." ipv4digits ) {3}', 'ipv4' => 'ipv4digits ( "." ipv4digits ) {3,3}',
'request_uri' => '( path ? file ) ? ( "?" flags ) ?', 'request_uri' => '( path ? file ) ? ( "?" flags ) ?',
'serv' => 'host ( ":" port ? ) ?', 'serv' => 'host ( ":" port ? ) ?',
'pre' => '( ( scheme "://" ) ( auth "@" ) ? serv ) ?', 'pre' => '( ( scheme "://" ) ( auth "@" ) ? serv ) ?',

View file

@ -4,32 +4,32 @@ end
class RegExpr::Segment class RegExpr::Segment
attr_accessor :value attr_accessor :value
def initialize( val) self. value= val end def initialize( val) self.value= val end
def to_r() self. value. to_s end def to_r() self.value.to_s end
def empty?() self. value. nil? end def empty?() self.value.nil? end
def names() @value. names. flatten. compact end def names() @value.names.flatten.compact end
def optimize def optimize
self. value= self. class. optimize self. value self.value= self.class.optimize self.value
self self
end end
class <<self class <<self
def optimize v def optimize v
v= v. optimize v= v.optimize
v= nil if v && v. empty? v= nil if v and v.empty?
v= v. value[ 0] if v. instance_of?( RegExpr::Block) && v. hidden && v. size == 1 v= v.value[ 0] if RegExpr::Block === v and v.hidden and v.size == 1
v v
end end
def deepest def deepest
self. class_eval do self.class_eval do
def names() [] end def names() [] end
end end
end end
def novalue def novalue
self. class_eval do self.class_eval do
def initialize() end def initialize() end
def empty?() false end def empty?() false end
def to_r() '' end def to_r() '' end
@ -38,7 +38,7 @@ class RegExpr::Segment
end end
def nooptimize def nooptimize
self. class_eval do self.class_eval do
def optimize() self end def optimize() self end
end end
end end
@ -48,71 +48,69 @@ end
class RegExpr::Block< RegExpr::Segment class RegExpr::Block< RegExpr::Segment
attr_accessor :name, :hidden attr_accessor :name, :hidden
def hidden?() @hidden end def hidden?() @hidden end
def optimize() self. dup. optimize! end def optimize() self.dup.optimize! end
def push( *v) @value. push *v end def push( *v) @value.push *v end
def pop() @value. pop end def pop() @value.pop end
def empty?() @value. empty? end def empty?() @value.empty? end
def size() @value. size end def size() @value.size end
def names def names
names= @value. collect {|v| v. names } names= @value.collect &:names
names. push( name) unless self. hidden? names.push( name) unless self.hidden?
names. flatten. compact names.flatten.compact
end end
def initialize *val def initialize *val
val= val[ 0] if val. size == 1 && val[ 0]. instance_of?( Array) val= val[ 0] if ::Array === val[ 0] and val.size == 1
super val super val
@hidden= true @hidden= true
end end
def optimize! def optimize!
list, chars= [[]], RegExpr::Chars. new( '') list, chars= [[]], RegExpr::Chars.new( '')
@value. each do |v| @value.each do |v|
v= self. class. optimize v v= self.class.optimize v
if v.instance_of? RegExpr::Or if RegExpr::Or === v
list. push [] list.push []
else list[ -1]. push v else list[ -1].push v
end end
end end
list. delete_if do |v| list.delete_if do |v|
if v. size == 1 && ( v[ 0]. instance_of?( RegExpr::Chars) || v[ 0]. instance_of?( RegExpr::Char) ) if (RegExpr::Chars === v[ 0] and v.size == 1 ) or RegExpr::Char === v[ 0]
chars+= v[ 0] chars+= v[ 0]
else false else false
end end
end end
chars= chars. optimize chars= chars.optimize
values= [] values= []
list. each do |v| list.each do |v|
values. push RegExpr::Or. new values.push RegExpr::Or.new
values+= if v. size == 1 && values+= if v.size == 1 and RegExpr::Block === v[ 0] and v[ 0].hidden
v[ 0]. instance_of?( RegExpr::Block) && v[ 0].value
v[ 0]. hidden
v[ 0]. value
else else
v. collect do |w| v.collect do |w|
if w. instance_of?( RegExpr::Block) && w. hidden if RegExpr::Block === w and w.hidden
u= false u= false
w. value. each do |i| w.value.each do |i|
break unless u||= i. instance_of?( RegExpr::Or) break unless u||= RegExpr::Or === i
end end
u ? w : w. value u ? w : w.value
else w else w
end end
end. flatten end.flatten
end end
end end
values.push RegExpr::Or. new, chars if chars. size > 0 values.push RegExpr::Or.new, chars if chars.size > 0
values. shift values.shift
@value= values @value= values
self self
end end
def to_r() def to_r()
(@hidden ? '(?:%s)' : '(%s)')% @value. collect {|i| i.to_r }. join( '') (@hidden ? '(?:%s)' : '(%s)')% @value.collect( &:to_r).join( '')
end end
end end
@ -120,9 +118,9 @@ class RegExpr::Not< RegExpr::Segment
deepest deepest
novalue novalue
def to_r def to_r
if @value. instance_of? RegExpr::Chars if @value.instance_of? RegExpr::Chars
@value. not! @value.not!
@value. to_s @value.to_s
else '(?!%s)'% @value else '(?!%s)'% @value
end end
end end
@ -132,8 +130,8 @@ class RegExpr::Range< RegExpr::Segment
novalue novalue
attr_accessor :v1, :v2 attr_accessor :v1, :v2
def names() [] end def names() [] end
def optimize() self. value. optimize end def optimize() self.value.optimize end
def to_r() self. optimize. to_r end def to_r() self.optimize.to_r end
def initialize( v1, v2) @v1, @v2= v1, v2 end def initialize( v1, v2) @v1, @v2= v1, v2 end
# algo stolen from thomas leitner # algo stolen from thomas leitner
@ -141,28 +139,28 @@ class RegExpr::Range< RegExpr::Segment
a, b= @v1< @v2 ? [ @v1, @v2] : [ @v2, @v1] a, b= @v1< @v2 ? [ @v1, @v2] : [ @v2, @v1]
arr= Array[ a] arr= Array[ a]
af= a == 0 ? 1.0 : a. to_f af= a == 0 ? 1.0 : a.to_f
bf= b == 0 ? 1.0 : b. to_f bf= b == 0 ? 1.0 : b.to_f
1. upto( b. to_s. length- 1) do |i| 1.upto( b.to_s.length- 1) do |i|
pot= 10** i pot= 10** i
num= (af/ pot). ceil* pot # next higher number with i zeros num= (af/ pot).ceil* pot # next higher number with i zeros
arr. insert i, num if num < @v2 arr.insert i, num if num < @v2
num= (bf/ pot). floor* pot # next lower number with i zeros num= (bf/ pot).floor* pot # next lower number with i zeros
arr. insert -i, num arr.insert -i, num
end end
arr. uniq! arr.uniq!
arr. push b+ 1 # +1 -> to handle it in the same way as the other elements arr.push b+ 1 # +1 -> to handle it in the same way as the other elements
result= RegExpr::Block. new result= RegExpr::Block.new
0. upto( arr. length- 2) do |i| 0.upto( arr. length- 2) do |i|
first= arr[ i]. to_s first= arr[ i].to_s
second= (arr[ i+ 1]- 1).to_s second= (arr[ i+ 1]- 1).to_s
result. push RegExpr::Or. new result.push RegExpr::Or.new
0. upto( first. length- 1) do |j| 0.upto( first.length- 1) do |j|
result. push( if first[ j] == second[ j] result.push( if first[ j] == second[ j]
RegExpr::Char. new first[ j]. chr RegExpr::Char.new first[ j].chr
else else
RegExpr::Chars. new '%c-%c'% [ first[ j], second[ j] ] RegExpr::Chars.new '%c-%c'% [ first[ j], second[ j] ]
end) end)
end end
end end
@ -178,7 +176,7 @@ class RegExpr::Chars< RegExpr::Segment
def not?() @not end def not?() @not end
def empty?() @chars. empty? end def empty?() @chars. empty? end
def size() @chars. size end def size() @chars. size end
def value=( val) @chars= (@not= val[ 0] == ?^) ? val[ 1.. -1] : val end def value=( val) @chars= (@not= val[ 0] == ?^) ? val[ 1.. -1] : val ; val end
def value() (self. not? ? '^' : '')+ (@chars) end def value() (self. not? ? '^' : '')+ (@chars) end
def not!() @not= !@not end def not!() @not= !@not end
alias -@ not! alias -@ not!
@ -186,12 +184,12 @@ class RegExpr::Chars< RegExpr::Segment
def split def split
chars= [] chars= []
@chars. gsub( /\\-/) do |r| @chars. gsub( /\\-/) do |r|
chars. push ?- chars. push ?-.ord
nil nil
end. gsub( /.-./) do |r| end. gsub( /.-./) do |r|
chars+= ((r[ 0] .. r[ 2]). to_a) chars+= (r[ 0].ord .. r[ 2].ord). to_a
nil nil
end. each_byte do |c| end. bytes. each do |c|
chars. push c chars. push c
end end
chars chars
@ -199,46 +197,47 @@ class RegExpr::Chars< RegExpr::Segment
def optimize! def optimize!
b2chr= lambda do |b| b2chr= lambda do |b|
"-[]".include?( b.chr) ? '\%c'% b : b. chr b = b.chr
"-[]".include?( b) ? '\%c'% b : b
end end
chars= self. chars. bytes. sort. uniq chars= self. split. sort. uniq
$stderr.puts chars.inspect
@chars= '' @chars= ''
return self if chars. empty? return self if chars.empty?
b= chars. shift b= chars.shift
chars. each do |i| chars.each do |i|
if b+1 == i if b+1 == i
unless @chars[ -1] == ?- && @chars[-2] != ?\\ unless @chars[ -1] == ?- and @chars[-2] != ?\\
@chars+= b2chr. call( b)+ '-' @chars+= b2chr.call( b)+ '-'
end end
else @chars+= b2chr. call b else @chars+= b2chr.call b
end end
b= i b= i
end end
@chars+= b2chr. call b @chars+= b2chr.call b
self self
end end
def optimize def optimize
n= self. dup. optimize! n= self.dup.optimize!
if (n. size == 1 || (n. size == 2 && n. value[ 0] == ?\\ )) && ! n. not? if (n.size == 1 or (n.size == 2 and n.value[ 0] == ?\\ )) and not n.not?
RegExpr::Char. new n. chars[ -1]. chr RegExpr::Char.new n.chars[ -1]
else n else
n
end end
end end
def + b def + b
chars= self. not? ? '^' : '' chars= self.not? ? '^' : ''
chars+= if b. instance_of? RegExpr::Char chars+= if b.instance_of? RegExpr::Char
self. split.push b. value[ 0] self.split.push b.value[ 0]
elsif self. not? == b. not? elsif self.not? == b.not?
self. split+ b. split self.split+ b.split
elsif self. not? elsif self.not?
(0.. 255). to_a- self. split+ b. split (0..255).to_a- self.split+ b.split
else else
(0.. 255). to_a- b. split+ self. split (0..255).to_a- b.split+ self.split
end. compact. uniq. collect {|i| i. chr }. join( '') end.compact.uniq.collect {|i| i.chr }.join( '')
self. class. new chars self.class.new chars
end end
end end
@ -248,36 +247,38 @@ class RegExpr::Repeat< RegExpr::Segment
def minandmax x def minandmax x
case x case x
when nil, '' then nil when nil, '' then nil
else x. to_i else x.to_i
end end
end end
def optimize def optimize
super super
min == 1 && max == 1 ? @value : self r = (min == 1 and max == 1) ? @value : self
r
end end
def initialize value, min= 1, max= min def initialize value, min= 1, max= min
super value super value
@min, @max= self. minandmax( min), self. minandmax( max) @min, @max= self.minandmax( min), self.minandmax( max)
end end
def to_r def to_r
t= '{%s,%s}'% [ @min||'', @max||'' ] t= '{%s,%s}'% [ @min||'', @max||'' ]
return '' if '{0,0}' == t
t= Hash[ *%w<{,1} ? {0,1} ? {0,} * {,} * {1,} +>+ ['{1,1}', ''] ][ t]|| t t= Hash[ *%w<{,1} ? {0,1} ? {0,} * {,} * {1,} +>+ ['{1,1}', ''] ][ t]|| t
@value. to_r+ t @value.to_r+ t
end end
end end
class RegExpr::Char< RegExpr::Segment class RegExpr::Char< RegExpr::Segment
deepest deepest
nooptimize nooptimize
def to_r() ::Regexp. quote @value end def to_r() ::Regexp.quote @value end
def size() 1 end def size() 1 end
def self. new x def self.new x
x= x. split( ''). collect {|i| super i } x= x.split( '').collect {|i| super i }
x. size == 1 ? x[ 0] : RegExpr::Block. new( x) x.size == 1 ? x[ 0] : RegExpr::Block.new( x)
end end
end end
@ -331,128 +332,131 @@ class RegExpr
def [] *vals def [] *vals
ret= super *vals ret= super *vals
STDEXP. each {|k, v| ret[ k]||= v } STDEXP.each {|k, v| ret[ k]||= v }
ret ret
end end
def new *vals def new *vals
ret= super *vals ret= super *vals
STDEXP. each {|k, v| ret[ k]||= v } STDEXP.each {|k, v| ret[ k]||= v }
ret ret
end end
end end
def to_r exp= :main def to_r exp= :main
r = self. to_re( exp). optimize r = self.to_re( exp)
h, r = r. hidden?, r. to_r #r.optimize!
h, r = r.hidden?, r.to_r
r = r[ 1...-1] unless h r = r[ 1...-1] unless h
::Regexp. new r ::Regexp.new r
end end
def to_re exp= :main def to_re exp= :main
u= RegExpr::Block. new u= RegExpr::Block.new
t, u. hidden= if exp. instance_of? Symbol t, u.hidden= if Symbol === exp
u. name= exp. to_sym u.name= exp.to_sym
if self[ exp] if self[ exp]
[ self[ exp], false] [ self[ exp], false]
else [ self[ exp. to_s], true] else [ self[ exp.to_s], true]
end end
else [ exp. to_s, true] else [ exp.to_s, true]
end end
until !t || t. empty? until !t or t.empty?
v, t= self. to_r_next t v, t= self.to_r_next t
case v case v
when ')' then return u, t when ')' then return u, t
when RegExpr::Repeat then v. value= u. pop when RegExpr::Repeat then v.value= u.pop
end end
u. push v u.push v
end end
u u
end end
def to_r_next exp def to_r_next exp
exp. strip! exp.strip!
/^/. match exp[ 1.. -1] /^/ =~ exp[ 1.. -1]
t= case exp[ 0] t= case exp[ 0]
when ?^ then return RegExpr::Begin. new, exp[ 1.. -1] when ?^ then return RegExpr::Begin.new, exp[ 1.. -1]
when ?$ then return RegExpr::End. new, exp[ 1.. -1] when ?$ then return RegExpr::End.new, exp[ 1.. -1]
when ?\\ when ?\\
h= case exp[ 1] h= case exp[ 1]
when ?D, ?S, ?W, ?a, ?d.. ?f, ?n, ?r.. ?t, ?v, ?w when ?D, ?S, ?W, ?a, ?d.. ?f, ?n, ?r.. ?t, ?v, ?w
return RegExpr::WildCard. new( '\%c'% exp[ 1]), exp[ 2.. -1] return RegExpr::WildCard.new( '\%c'% exp[ 1]), exp[ 2.. -1]
when ?x then 16 when ?x then 16
when ?o then 8 when ?o then 8
when ?b then 2 when ?b then 2
when ?0.. ?9 when ?0.. ?9
exp= 'XX'+ exp[ 1.. -1] exp= 'XX'+ exp[ 1.. -1]
10 10
else Kernel. raise ArgumentError, 'Unknown form "%s"'% exp else raise ArgumentError, 'Unknown form "%s"'% exp
end end
i= exp[ 2.. -1]. to_i h i= exp[ 2.. -1].to_i h
return RegExpr::Char. new( i.chr), exp[ (i. to_s( h). size+ 2).. -1] return RegExpr::Char.new( i.chr), exp[ (i.to_s( h). size+ 2).. -1]
when ?. then return RegExpr::WildCard. new( '.'), exp[ 1.. -1] when ?. then return RegExpr::WildCard.new( '.'), exp[ 1.. -1]
when ?0 when ?0
case exp[ 1] case exp[ 1]
when ?x then %r<^0x([0-9a-f]+)>i. match exp when ?x then %r<^0x([0-9a-f]+)>i.match exp
return '', $1. to_i( 16). to_s+ $' return '', $1.to_i( 16).to_s+ $'
when ?o then %r<^0o([0-8]+)>. match exp when ?o then %r<^0o([0-8]+)>.match exp
return '', $1. to_i( 8). to_s+ $' return '', $1.to_i( 8).to_s+ $'
when ?b then %r<^0b([01]+)>. match exp when ?b then %r<^0b([01]+)>.match exp
return '', $1. to_i( 2). to_s+ $' return '', $1.to_i( 2).to_s+ $'
else else
case exp case exp
when %r<(\d+)..(\d+)> then RegExpr::Range. new $1. to_i, $2. to_i when %r<(\d+)..(\d+)> then RegExpr::Range.new $1.to_i, $2.to_i
when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat. new '', *$1. split( ',') when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat.new '', *$1.split( ',')
else Kernel. raise ArgumentError, 'Unknown form "%s"'% exp else raise ArgumentError, 'Unknown form "%s"'% exp
end end
end end
when ?( then return self. to_re( exp[ 1.. -1]) when ?( then return self.to_re( exp[ 1.. -1])
when ?) then ')' when ?) then ')'
when ?| then RegExpr::Or. new when ?| then RegExpr::Or.new
when ?+ then RegExpr::Repeat. new '', 1, nil when ?+ then RegExpr::Repeat.new '', 1, nil
when ?* then RegExpr::Repeat. new '', nil when ?* then RegExpr::Repeat.new '', nil
when ?? then RegExpr::Repeat. new '', 0, 1 when ?? then RegExpr::Repeat.new '', 0, 1
when ?" then RegExpr::Char. new %r<^"((?:[^"]|\\")*)">. match( exp)[ 1] when ?" then RegExpr::Char.new %r<^"((?:[^"]|\\")*)">.match( exp)[ 1]
when ?[ then RegExpr::Chars. new %r<^\[((?:[^\]]|\\\])*[^\\]|)\]>. match( exp)[ 1] when ?[ then RegExpr::Chars.new %r<^\[((?:[^\]]|\\\])*[^\\]|)\]>.match( exp)[ 1]
when ?/ then exp =~ %r<^/((?:[^/]|\\/)*)/(im?|mi)?> when ?/ then exp =~ %r<^/((?:[^/]|\\/)*)/(im?|mi)?>
RegExpr::Regexp. new ::Regexp. new( $1, RegExpr::Regexp.new ::Regexp.new( $1,
($2 =~ /i/ ? ::Regexp::IGNORECASE : 0)+ ($2 =~ /i/ ? ::Regexp::IGNORECASE : 0)+
($2 =~ /m/ ? ::Regexp::MULTILINE : 0)) ($2 =~ /m/ ? ::Regexp::MULTILINE : 0))
else else
case exp case exp
when %r<^([a-z_][a-z_0-9]*\b)>i then self. to_re $1. to_sym when %r<^([a-z_][a-z_0-9]*\b)>i then self.to_re $1.to_sym
when %r<(\d+)..(\d+)> then RegExpr::Range. new $1. to_i, $2. to_i when %r<(\d+)..(\d+)> then RegExpr::Range.new $1.to_i, $2.to_i
when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat. new '', *$1. split( ',') when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat.new '', *$1.split( ',')
else Kernel. raise ArgumentError, 'Unknown form "%s"'% exp else raise ArgumentError, 'Unknown form "%s"'% exp
end end
end end
[ t, $' ] [ t, $' ]
end end
def def cl= Class. new, *exp def def cl= Class.new, *exp
exp= [ :main ] if exp. empty? exp= [ :main ] if exp.empty?
exp. each do |e| exp.each do |e|
re= self. to_re e re= self.to_re e
names= re. names. collect {|n| '@%s'% n }. join ', ' names= re.names.collect('@%s'.method(:%)).join ', '
re= ::Regexp. new '^%s$'% re. optimize. to_r re= ::Regexp.new '^%s$'% re.to_r
ev= <<-EOF ev= <<-EOF
def #{e}= val def #{e}= val
m= #{re. inspect}. match val m= #{re.inspect}. match val
raise ArgumentError, 'Unallowed Chars! (%s =~ #{re. inspect})'% val. inspect unless m raise ArgumentError, 'Unallowed Chars! (%s =~ #{re.inspect})'% val. inspect unless m
#{names}= *m[ 1.. -1] #{names}= *m[ 1.. -1]
end end
EOF EOF
cl. class_eval ev cl.class_eval ev
end end
cl cl
end end
def match( m, exp= :main) self. to_r( exp). match m end def match( m, exp= :main) to_r( exp).match m end
def =~( x) to_r =~ x end
end end