optimizer broken. :/ (not compatible to ruby 1.9.1, changes needed)

pull/2/head v0.0.1
Denis Knauf 2010-03-19 12:11:16 +01:00
parent 203f8103e5
commit 7f98fd7d4b
2 changed files with 161 additions and 157 deletions

View File

@ -45,7 +45,7 @@ class Uri
'userinfo' => 'username ( ":" password ) ?',
'domainlabel' => 'alphadigit ( ( alphadigit | "-" ) * alphadigit ) ?',
'hostname' => 'domainlabel ( "." | domainlabel ) *',
'ipv4' => 'ipv4digits ( "." ipv4digits ) {3}',
'ipv4' => 'ipv4digits ( "." ipv4digits ) {3,3}',
'request_uri' => '( path ? file ) ? ( "?" flags ) ?',
'serv' => 'host ( ":" port ? ) ?',
'pre' => '( ( scheme "://" ) ( auth "@" ) ? serv ) ?',

View File

@ -4,32 +4,32 @@ end
class RegExpr::Segment
attr_accessor :value
def initialize( val) self. value= val end
def to_r() self. value. to_s end
def empty?() self. value. nil? end
def names() @value. names. flatten. compact end
def initialize( val) self.value= val end
def to_r() self.value.to_s end
def empty?() self.value.nil? end
def names() @value.names.flatten.compact end
def optimize
self. value= self. class. optimize self. value
self.value= self.class.optimize self.value
self
end
class <<self
def optimize v
v= v. optimize
v= nil if v && v. empty?
v= v. value[ 0] if v. instance_of?( RegExpr::Block) && v. hidden && v. size == 1
v= v.optimize
v= nil if v and v.empty?
v= v.value[ 0] if RegExpr::Block === v and v.hidden and v.size == 1
v
end
def deepest
self. class_eval do
self.class_eval do
def names() [] end
end
end
def novalue
self. class_eval do
self.class_eval do
def initialize() end
def empty?() false end
def to_r() '' end
@ -38,7 +38,7 @@ class RegExpr::Segment
end
def nooptimize
self. class_eval do
self.class_eval do
def optimize() self end
end
end
@ -48,71 +48,69 @@ end
class RegExpr::Block< RegExpr::Segment
attr_accessor :name, :hidden
def hidden?() @hidden end
def optimize() self. dup. optimize! end
def push( *v) @value. push *v end
def pop() @value. pop end
def empty?() @value. empty? end
def size() @value. size end
def optimize() self.dup.optimize! end
def push( *v) @value.push *v end
def pop() @value.pop end
def empty?() @value.empty? end
def size() @value.size end
def names
names= @value. collect {|v| v. names }
names. push( name) unless self. hidden?
names. flatten. compact
names= @value.collect &:names
names.push( name) unless self.hidden?
names.flatten.compact
end
def initialize *val
val= val[ 0] if val. size == 1 && val[ 0]. instance_of?( Array)
val= val[ 0] if ::Array === val[ 0] and val.size == 1
super val
@hidden= true
end
def optimize!
list, chars= [[]], RegExpr::Chars. new( '')
list, chars= [[]], RegExpr::Chars.new( '')
@value. each do |v|
v= self. class. optimize v
if v.instance_of? RegExpr::Or
list. push []
else list[ -1]. push v
@value.each do |v|
v= self.class.optimize v
if RegExpr::Or === v
list.push []
else list[ -1].push v
end
end
list. delete_if do |v|
if v. size == 1 && ( v[ 0]. instance_of?( RegExpr::Chars) || v[ 0]. instance_of?( RegExpr::Char) )
list.delete_if do |v|
if (RegExpr::Chars === v[ 0] and v.size == 1 ) or RegExpr::Char === v[ 0]
chars+= v[ 0]
else false
end
end
chars= chars. optimize
chars= chars.optimize
values= []
list. each do |v|
values. push RegExpr::Or. new
values+= if v. size == 1 &&
v[ 0]. instance_of?( RegExpr::Block) &&
v[ 0]. hidden
v[ 0]. value
list.each do |v|
values.push RegExpr::Or.new
values+= if v.size == 1 and RegExpr::Block === v[ 0] and v[ 0].hidden
v[ 0].value
else
v. collect do |w|
if w. instance_of?( RegExpr::Block) && w. hidden
v.collect do |w|
if RegExpr::Block === w and w.hidden
u= false
w. value. each do |i|
break unless u||= i. instance_of?( RegExpr::Or)
w.value.each do |i|
break unless u||= RegExpr::Or === i
end
u ? w : w. value
u ? w : w.value
else w
end
end. flatten
end.flatten
end
end
values.push RegExpr::Or. new, chars if chars. size > 0
values. shift
values.push RegExpr::Or.new, chars if chars.size > 0
values.shift
@value= values
self
end
def to_r()
(@hidden ? '(?:%s)' : '(%s)')% @value. collect {|i| i.to_r }. join( '')
(@hidden ? '(?:%s)' : '(%s)')% @value.collect( &:to_r).join( '')
end
end
@ -120,9 +118,9 @@ class RegExpr::Not< RegExpr::Segment
deepest
novalue
def to_r
if @value. instance_of? RegExpr::Chars
@value. not!
@value. to_s
if @value.instance_of? RegExpr::Chars
@value.not!
@value.to_s
else '(?!%s)'% @value
end
end
@ -132,8 +130,8 @@ class RegExpr::Range< RegExpr::Segment
novalue
attr_accessor :v1, :v2
def names() [] end
def optimize() self. value. optimize end
def to_r() self. optimize. to_r end
def optimize() self.value.optimize end
def to_r() self.optimize.to_r end
def initialize( v1, v2) @v1, @v2= v1, v2 end
# algo stolen from thomas leitner
@ -141,28 +139,28 @@ class RegExpr::Range< RegExpr::Segment
a, b= @v1< @v2 ? [ @v1, @v2] : [ @v2, @v1]
arr= Array[ a]
af= a == 0 ? 1.0 : a. to_f
bf= b == 0 ? 1.0 : b. to_f
1. upto( b. to_s. length- 1) do |i|
af= a == 0 ? 1.0 : a.to_f
bf= b == 0 ? 1.0 : b.to_f
1.upto( b.to_s.length- 1) do |i|
pot= 10** i
num= (af/ pot). ceil* pot # next higher number with i zeros
arr. insert i, num if num < @v2
num= (bf/ pot). floor* pot # next lower number with i zeros
arr. insert -i, num
num= (af/ pot).ceil* pot # next higher number with i zeros
arr.insert i, num if num < @v2
num= (bf/ pot).floor* pot # next lower number with i zeros
arr.insert -i, num
end
arr. uniq!
arr. push b+ 1 # +1 -> to handle it in the same way as the other elements
arr.uniq!
arr.push b+ 1 # +1 -> to handle it in the same way as the other elements
result= RegExpr::Block. new
0. upto( arr. length- 2) do |i|
first= arr[ i]. to_s
result= RegExpr::Block.new
0.upto( arr. length- 2) do |i|
first= arr[ i].to_s
second= (arr[ i+ 1]- 1).to_s
result. push RegExpr::Or. new
0. upto( first. length- 1) do |j|
result. push( if first[ j] == second[ j]
RegExpr::Char. new first[ j]. chr
result.push RegExpr::Or.new
0.upto( first.length- 1) do |j|
result.push( if first[ j] == second[ j]
RegExpr::Char.new first[ j].chr
else
RegExpr::Chars. new '%c-%c'% [ first[ j], second[ j] ]
RegExpr::Chars.new '%c-%c'% [ first[ j], second[ j] ]
end)
end
end
@ -178,7 +176,7 @@ class RegExpr::Chars< RegExpr::Segment
def not?() @not end
def empty?() @chars. empty? end
def size() @chars. size end
def value=( val) @chars= (@not= val[ 0] == ?^) ? val[ 1.. -1] : val end
def value=( val) @chars= (@not= val[ 0] == ?^) ? val[ 1.. -1] : val ; val end
def value() (self. not? ? '^' : '')+ (@chars) end
def not!() @not= !@not end
alias -@ not!
@ -186,12 +184,12 @@ class RegExpr::Chars< RegExpr::Segment
def split
chars= []
@chars. gsub( /\\-/) do |r|
chars. push ?-
chars. push ?-.ord
nil
end. gsub( /.-./) do |r|
chars+= ((r[ 0] .. r[ 2]). to_a)
chars+= (r[ 0].ord .. r[ 2].ord). to_a
nil
end. each_byte do |c|
end. bytes. each do |c|
chars. push c
end
chars
@ -199,46 +197,47 @@ class RegExpr::Chars< RegExpr::Segment
def optimize!
b2chr= lambda do |b|
"-[]".include?( b.chr) ? '\%c'% b : b. chr
b = b.chr
"-[]".include?( b) ? '\%c'% b : b
end
chars= self. chars. bytes. sort. uniq
$stderr.puts chars.inspect
chars= self. split. sort. uniq
@chars= ''
return self if chars. empty?
b= chars. shift
chars. each do |i|
return self if chars.empty?
b= chars.shift
chars.each do |i|
if b+1 == i
unless @chars[ -1] == ?- && @chars[-2] != ?\\
@chars+= b2chr. call( b)+ '-'
unless @chars[ -1] == ?- and @chars[-2] != ?\\
@chars+= b2chr.call( b)+ '-'
end
else @chars+= b2chr. call b
else @chars+= b2chr.call b
end
b= i
end
@chars+= b2chr. call b
@chars+= b2chr.call b
self
end
def optimize
n= self. dup. optimize!
if (n. size == 1 || (n. size == 2 && n. value[ 0] == ?\\ )) && ! n. not?
RegExpr::Char. new n. chars[ -1]. chr
else n
n= self.dup.optimize!
if (n.size == 1 or (n.size == 2 and n.value[ 0] == ?\\ )) and not n.not?
RegExpr::Char.new n.chars[ -1]
else
n
end
end
def + b
chars= self. not? ? '^' : ''
chars+= if b. instance_of? RegExpr::Char
self. split.push b. value[ 0]
elsif self. not? == b. not?
self. split+ b. split
elsif self. not?
(0.. 255). to_a- self. split+ b. split
chars= self.not? ? '^' : ''
chars+= if b.instance_of? RegExpr::Char
self.split.push b.value[ 0]
elsif self.not? == b.not?
self.split+ b.split
elsif self.not?
(0..255).to_a- self.split+ b.split
else
(0.. 255). to_a- b. split+ self. split
end. compact. uniq. collect {|i| i. chr }. join( '')
self. class. new chars
(0..255).to_a- b.split+ self.split
end.compact.uniq.collect {|i| i.chr }.join( '')
self.class.new chars
end
end
@ -248,36 +247,38 @@ class RegExpr::Repeat< RegExpr::Segment
def minandmax x
case x
when nil, '' then nil
else x. to_i
else x.to_i
end
end
def optimize
super
min == 1 && max == 1 ? @value : self
r = (min == 1 and max == 1) ? @value : self
r
end
def initialize value, min= 1, max= min
super value
@min, @max= self. minandmax( min), self. minandmax( max)
@min, @max= self.minandmax( min), self.minandmax( max)
end
def to_r
t= '{%s,%s}'% [ @min||'', @max||'' ]
return '' if '{0,0}' == t
t= Hash[ *%w<{,1} ? {0,1} ? {0,} * {,} * {1,} +>+ ['{1,1}', ''] ][ t]|| t
@value. to_r+ t
@value.to_r+ t
end
end
class RegExpr::Char< RegExpr::Segment
deepest
nooptimize
def to_r() ::Regexp. quote @value end
def to_r() ::Regexp.quote @value end
def size() 1 end
def self. new x
x= x. split( ''). collect {|i| super i }
x. size == 1 ? x[ 0] : RegExpr::Block. new( x)
def self.new x
x= x.split( '').collect {|i| super i }
x.size == 1 ? x[ 0] : RegExpr::Block.new( x)
end
end
@ -331,128 +332,131 @@ class RegExpr
def [] *vals
ret= super *vals
STDEXP. each {|k, v| ret[ k]||= v }
STDEXP.each {|k, v| ret[ k]||= v }
ret
end
def new *vals
ret= super *vals
STDEXP. each {|k, v| ret[ k]||= v }
STDEXP.each {|k, v| ret[ k]||= v }
ret
end
end
def to_r exp= :main
r = self. to_re( exp). optimize
h, r = r. hidden?, r. to_r
r = self.to_re( exp)
#r.optimize!
h, r = r.hidden?, r.to_r
r = r[ 1...-1] unless h
::Regexp. new r
::Regexp.new r
end
def to_re exp= :main
u= RegExpr::Block. new
t, u. hidden= if exp. instance_of? Symbol
u. name= exp. to_sym
u= RegExpr::Block.new
t, u.hidden= if Symbol === exp
u.name= exp.to_sym
if self[ exp]
[ self[ exp], false]
else [ self[ exp. to_s], true]
else [ self[ exp.to_s], true]
end
else [ exp. to_s, true]
else [ exp.to_s, true]
end
until !t || t. empty?
v, t= self. to_r_next t
until !t or t.empty?
v, t= self.to_r_next t
case v
when ')' then return u, t
when RegExpr::Repeat then v. value= u. pop
when RegExpr::Repeat then v.value= u.pop
end
u. push v
u.push v
end
u
end
def to_r_next exp
exp. strip!
/^/. match exp[ 1.. -1]
exp.strip!
/^/ =~ exp[ 1.. -1]
t= case exp[ 0]
when ?^ then return RegExpr::Begin. new, exp[ 1.. -1]
when ?$ then return RegExpr::End. new, exp[ 1.. -1]
when ?^ then return RegExpr::Begin.new, exp[ 1.. -1]
when ?$ then return RegExpr::End.new, exp[ 1.. -1]
when ?\\
h= case exp[ 1]
when ?D, ?S, ?W, ?a, ?d.. ?f, ?n, ?r.. ?t, ?v, ?w
return RegExpr::WildCard. new( '\%c'% exp[ 1]), exp[ 2.. -1]
return RegExpr::WildCard.new( '\%c'% exp[ 1]), exp[ 2.. -1]
when ?x then 16
when ?o then 8
when ?b then 2
when ?0.. ?9
exp= 'XX'+ exp[ 1.. -1]
10
else Kernel. raise ArgumentError, 'Unknown form "%s"'% exp
else raise ArgumentError, 'Unknown form "%s"'% exp
end
i= exp[ 2.. -1]. to_i h
return RegExpr::Char. new( i.chr), exp[ (i. to_s( h). size+ 2).. -1]
i= exp[ 2.. -1].to_i h
return RegExpr::Char.new( i.chr), exp[ (i.to_s( h). size+ 2).. -1]
when ?. then return RegExpr::WildCard. new( '.'), exp[ 1.. -1]
when ?. then return RegExpr::WildCard.new( '.'), exp[ 1.. -1]
when ?0
case exp[ 1]
when ?x then %r<^0x([0-9a-f]+)>i. match exp
return '', $1. to_i( 16). to_s+ $'
when ?o then %r<^0o([0-8]+)>. match exp
return '', $1. to_i( 8). to_s+ $'
when ?b then %r<^0b([01]+)>. match exp
return '', $1. to_i( 2). to_s+ $'
when ?x then %r<^0x([0-9a-f]+)>i.match exp
return '', $1.to_i( 16).to_s+ $'
when ?o then %r<^0o([0-8]+)>.match exp
return '', $1.to_i( 8).to_s+ $'
when ?b then %r<^0b([01]+)>.match exp
return '', $1.to_i( 2).to_s+ $'
else
case exp
when %r<(\d+)..(\d+)> then RegExpr::Range. new $1. to_i, $2. to_i
when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat. new '', *$1. split( ',')
else Kernel. raise ArgumentError, 'Unknown form "%s"'% exp
when %r<(\d+)..(\d+)> then RegExpr::Range.new $1.to_i, $2.to_i
when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat.new '', *$1.split( ',')
else raise ArgumentError, 'Unknown form "%s"'% exp
end
end
when ?( then return self. to_re( exp[ 1.. -1])
when ?( then return self.to_re( exp[ 1.. -1])
when ?) then ')'
when ?| then RegExpr::Or. new
when ?| then RegExpr::Or.new
when ?+ then RegExpr::Repeat. new '', 1, nil
when ?* then RegExpr::Repeat. new '', nil
when ?? then RegExpr::Repeat. new '', 0, 1
when ?+ then RegExpr::Repeat.new '', 1, nil
when ?* then RegExpr::Repeat.new '', nil
when ?? then RegExpr::Repeat.new '', 0, 1
when ?" then RegExpr::Char. new %r<^"((?:[^"]|\\")*)">. match( exp)[ 1]
when ?[ then RegExpr::Chars. new %r<^\[((?:[^\]]|\\\])*[^\\]|)\]>. match( exp)[ 1]
when ?" then RegExpr::Char.new %r<^"((?:[^"]|\\")*)">.match( exp)[ 1]
when ?[ then RegExpr::Chars.new %r<^\[((?:[^\]]|\\\])*[^\\]|)\]>.match( exp)[ 1]
when ?/ then exp =~ %r<^/((?:[^/]|\\/)*)/(im?|mi)?>
RegExpr::Regexp. new ::Regexp. new( $1,
RegExpr::Regexp.new ::Regexp.new( $1,
($2 =~ /i/ ? ::Regexp::IGNORECASE : 0)+
($2 =~ /m/ ? ::Regexp::MULTILINE : 0))
else
case exp
when %r<^([a-z_][a-z_0-9]*\b)>i then self. to_re $1. to_sym
when %r<(\d+)..(\d+)> then RegExpr::Range. new $1. to_i, $2. to_i
when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat. new '', *$1. split( ',')
else Kernel. raise ArgumentError, 'Unknown form "%s"'% exp
when %r<^([a-z_][a-z_0-9]*\b)>i then self.to_re $1.to_sym
when %r<(\d+)..(\d+)> then RegExpr::Range.new $1.to_i, $2.to_i
when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat.new '', *$1.split( ',')
else raise ArgumentError, 'Unknown form "%s"'% exp
end
end
[ t, $' ]
end
def def cl= Class. new, *exp
exp= [ :main ] if exp. empty?
exp. each do |e|
re= self. to_re e
names= re. names. collect {|n| '@%s'% n }. join ', '
re= ::Regexp. new '^%s$'% re. optimize. to_r
def def cl= Class.new, *exp
exp= [ :main ] if exp.empty?
exp.each do |e|
re= self.to_re e
names= re.names.collect('@%s'.method(:%)).join ', '
re= ::Regexp.new '^%s$'% re.to_r
ev= <<-EOF
def #{e}= val
m= #{re. inspect}. match val
raise ArgumentError, 'Unallowed Chars! (%s =~ #{re. inspect})'% val. inspect unless m
m= #{re.inspect}. match val
raise ArgumentError, 'Unallowed Chars! (%s =~ #{re.inspect})'% val. inspect unless m
#{names}= *m[ 1.. -1]
end
EOF
cl. class_eval ev
cl.class_eval ev
end
cl
end
def match( m, exp= :main) self. to_r( exp). match m end
def match( m, exp= :main) to_r( exp).match m end
def =~( x) to_r =~ x end
end