optimizer broken. :/ (not compatible to ruby 1.9.1, changes needed)
This commit is contained in:
parent
203f8103e5
commit
7f98fd7d4b
2 changed files with 161 additions and 157 deletions
|
@ -45,7 +45,7 @@ class Uri
|
||||||
'userinfo' => 'username ( ":" password ) ?',
|
'userinfo' => 'username ( ":" password ) ?',
|
||||||
'domainlabel' => 'alphadigit ( ( alphadigit | "-" ) * alphadigit ) ?',
|
'domainlabel' => 'alphadigit ( ( alphadigit | "-" ) * alphadigit ) ?',
|
||||||
'hostname' => 'domainlabel ( "." | domainlabel ) *',
|
'hostname' => 'domainlabel ( "." | domainlabel ) *',
|
||||||
'ipv4' => 'ipv4digits ( "." ipv4digits ) {3}',
|
'ipv4' => 'ipv4digits ( "." ipv4digits ) {3,3}',
|
||||||
'request_uri' => '( path ? file ) ? ( "?" flags ) ?',
|
'request_uri' => '( path ? file ) ? ( "?" flags ) ?',
|
||||||
'serv' => 'host ( ":" port ? ) ?',
|
'serv' => 'host ( ":" port ? ) ?',
|
||||||
'pre' => '( ( scheme "://" ) ( auth "@" ) ? serv ) ?',
|
'pre' => '( ( scheme "://" ) ( auth "@" ) ? serv ) ?',
|
||||||
|
|
|
@ -17,8 +17,8 @@ class RegExpr::Segment
|
||||||
class <<self
|
class <<self
|
||||||
def optimize v
|
def optimize v
|
||||||
v= v.optimize
|
v= v.optimize
|
||||||
v= nil if v && v. empty?
|
v= nil if v and v.empty?
|
||||||
v= v. value[ 0] if v. instance_of?( RegExpr::Block) && v. hidden && v. size == 1
|
v= v.value[ 0] if RegExpr::Block === v and v.hidden and v.size == 1
|
||||||
v
|
v
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -55,13 +55,13 @@ class RegExpr::Block< RegExpr::Segment
|
||||||
def size() @value.size end
|
def size() @value.size end
|
||||||
|
|
||||||
def names
|
def names
|
||||||
names= @value. collect {|v| v. names }
|
names= @value.collect &:names
|
||||||
names.push( name) unless self.hidden?
|
names.push( name) unless self.hidden?
|
||||||
names.flatten.compact
|
names.flatten.compact
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize *val
|
def initialize *val
|
||||||
val= val[ 0] if val. size == 1 && val[ 0]. instance_of?( Array)
|
val= val[ 0] if ::Array === val[ 0] and val.size == 1
|
||||||
super val
|
super val
|
||||||
@hidden= true
|
@hidden= true
|
||||||
end
|
end
|
||||||
|
@ -71,14 +71,14 @@ class RegExpr::Block< RegExpr::Segment
|
||||||
|
|
||||||
@value.each do |v|
|
@value.each do |v|
|
||||||
v= self.class.optimize v
|
v= self.class.optimize v
|
||||||
if v.instance_of? RegExpr::Or
|
if RegExpr::Or === v
|
||||||
list.push []
|
list.push []
|
||||||
else list[ -1].push v
|
else list[ -1].push v
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
list.delete_if do |v|
|
list.delete_if do |v|
|
||||||
if v. size == 1 && ( v[ 0]. instance_of?( RegExpr::Chars) || v[ 0]. instance_of?( RegExpr::Char) )
|
if (RegExpr::Chars === v[ 0] and v.size == 1 ) or RegExpr::Char === v[ 0]
|
||||||
chars+= v[ 0]
|
chars+= v[ 0]
|
||||||
else false
|
else false
|
||||||
end
|
end
|
||||||
|
@ -88,16 +88,14 @@ class RegExpr::Block< RegExpr::Segment
|
||||||
values= []
|
values= []
|
||||||
list.each do |v|
|
list.each do |v|
|
||||||
values.push RegExpr::Or.new
|
values.push RegExpr::Or.new
|
||||||
values+= if v. size == 1 &&
|
values+= if v.size == 1 and RegExpr::Block === v[ 0] and v[ 0].hidden
|
||||||
v[ 0]. instance_of?( RegExpr::Block) &&
|
|
||||||
v[ 0]. hidden
|
|
||||||
v[ 0].value
|
v[ 0].value
|
||||||
else
|
else
|
||||||
v.collect do |w|
|
v.collect do |w|
|
||||||
if w. instance_of?( RegExpr::Block) && w. hidden
|
if RegExpr::Block === w and w.hidden
|
||||||
u= false
|
u= false
|
||||||
w.value.each do |i|
|
w.value.each do |i|
|
||||||
break unless u||= i. instance_of?( RegExpr::Or)
|
break unless u||= RegExpr::Or === i
|
||||||
end
|
end
|
||||||
u ? w : w.value
|
u ? w : w.value
|
||||||
else w
|
else w
|
||||||
|
@ -112,7 +110,7 @@ class RegExpr::Block< RegExpr::Segment
|
||||||
end
|
end
|
||||||
|
|
||||||
def to_r()
|
def to_r()
|
||||||
(@hidden ? '(?:%s)' : '(%s)')% @value. collect {|i| i.to_r }. join( '')
|
(@hidden ? '(?:%s)' : '(%s)')% @value.collect( &:to_r).join( '')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -178,7 +176,7 @@ class RegExpr::Chars< RegExpr::Segment
|
||||||
def not?() @not end
|
def not?() @not end
|
||||||
def empty?() @chars. empty? end
|
def empty?() @chars. empty? end
|
||||||
def size() @chars. size end
|
def size() @chars. size end
|
||||||
def value=( val) @chars= (@not= val[ 0] == ?^) ? val[ 1.. -1] : val end
|
def value=( val) @chars= (@not= val[ 0] == ?^) ? val[ 1.. -1] : val ; val end
|
||||||
def value() (self. not? ? '^' : '')+ (@chars) end
|
def value() (self. not? ? '^' : '')+ (@chars) end
|
||||||
def not!() @not= !@not end
|
def not!() @not= !@not end
|
||||||
alias -@ not!
|
alias -@ not!
|
||||||
|
@ -186,12 +184,12 @@ class RegExpr::Chars< RegExpr::Segment
|
||||||
def split
|
def split
|
||||||
chars= []
|
chars= []
|
||||||
@chars. gsub( /\\-/) do |r|
|
@chars. gsub( /\\-/) do |r|
|
||||||
chars. push ?-
|
chars. push ?-.ord
|
||||||
nil
|
nil
|
||||||
end. gsub( /.-./) do |r|
|
end. gsub( /.-./) do |r|
|
||||||
chars+= ((r[ 0] .. r[ 2]). to_a)
|
chars+= (r[ 0].ord .. r[ 2].ord). to_a
|
||||||
nil
|
nil
|
||||||
end. each_byte do |c|
|
end. bytes. each do |c|
|
||||||
chars. push c
|
chars. push c
|
||||||
end
|
end
|
||||||
chars
|
chars
|
||||||
|
@ -199,16 +197,16 @@ class RegExpr::Chars< RegExpr::Segment
|
||||||
|
|
||||||
def optimize!
|
def optimize!
|
||||||
b2chr= lambda do |b|
|
b2chr= lambda do |b|
|
||||||
"-[]".include?( b.chr) ? '\%c'% b : b. chr
|
b = b.chr
|
||||||
|
"-[]".include?( b) ? '\%c'% b : b
|
||||||
end
|
end
|
||||||
chars= self. chars. bytes. sort. uniq
|
chars= self. split. sort. uniq
|
||||||
$stderr.puts chars.inspect
|
|
||||||
@chars= ''
|
@chars= ''
|
||||||
return self if chars.empty?
|
return self if chars.empty?
|
||||||
b= chars.shift
|
b= chars.shift
|
||||||
chars.each do |i|
|
chars.each do |i|
|
||||||
if b+1 == i
|
if b+1 == i
|
||||||
unless @chars[ -1] == ?- && @chars[-2] != ?\\
|
unless @chars[ -1] == ?- and @chars[-2] != ?\\
|
||||||
@chars+= b2chr.call( b)+ '-'
|
@chars+= b2chr.call( b)+ '-'
|
||||||
end
|
end
|
||||||
else @chars+= b2chr.call b
|
else @chars+= b2chr.call b
|
||||||
|
@ -221,9 +219,10 @@ class RegExpr::Chars< RegExpr::Segment
|
||||||
|
|
||||||
def optimize
|
def optimize
|
||||||
n= self.dup.optimize!
|
n= self.dup.optimize!
|
||||||
if (n. size == 1 || (n. size == 2 && n. value[ 0] == ?\\ )) && ! n. not?
|
if (n.size == 1 or (n.size == 2 and n.value[ 0] == ?\\ )) and not n.not?
|
||||||
RegExpr::Char. new n. chars[ -1]. chr
|
RegExpr::Char.new n.chars[ -1]
|
||||||
else n
|
else
|
||||||
|
n
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -254,7 +253,8 @@ class RegExpr::Repeat< RegExpr::Segment
|
||||||
|
|
||||||
def optimize
|
def optimize
|
||||||
super
|
super
|
||||||
min == 1 && max == 1 ? @value : self
|
r = (min == 1 and max == 1) ? @value : self
|
||||||
|
r
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize value, min= 1, max= min
|
def initialize value, min= 1, max= min
|
||||||
|
@ -264,6 +264,7 @@ class RegExpr::Repeat< RegExpr::Segment
|
||||||
|
|
||||||
def to_r
|
def to_r
|
||||||
t= '{%s,%s}'% [ @min||'', @max||'' ]
|
t= '{%s,%s}'% [ @min||'', @max||'' ]
|
||||||
|
return '' if '{0,0}' == t
|
||||||
t= Hash[ *%w<{,1} ? {0,1} ? {0,} * {,} * {1,} +>+ ['{1,1}', ''] ][ t]|| t
|
t= Hash[ *%w<{,1} ? {0,1} ? {0,} * {,} * {1,} +>+ ['{1,1}', ''] ][ t]|| t
|
||||||
@value.to_r+ t
|
@value.to_r+ t
|
||||||
end
|
end
|
||||||
|
@ -343,7 +344,8 @@ class RegExpr
|
||||||
end
|
end
|
||||||
|
|
||||||
def to_r exp= :main
|
def to_r exp= :main
|
||||||
r = self. to_re( exp). optimize
|
r = self.to_re( exp)
|
||||||
|
#r.optimize!
|
||||||
h, r = r.hidden?, r.to_r
|
h, r = r.hidden?, r.to_r
|
||||||
r = r[ 1...-1] unless h
|
r = r[ 1...-1] unless h
|
||||||
::Regexp.new r
|
::Regexp.new r
|
||||||
|
@ -351,7 +353,7 @@ class RegExpr
|
||||||
|
|
||||||
def to_re exp= :main
|
def to_re exp= :main
|
||||||
u= RegExpr::Block.new
|
u= RegExpr::Block.new
|
||||||
t, u. hidden= if exp. instance_of? Symbol
|
t, u.hidden= if Symbol === exp
|
||||||
u.name= exp.to_sym
|
u.name= exp.to_sym
|
||||||
if self[ exp]
|
if self[ exp]
|
||||||
[ self[ exp], false]
|
[ self[ exp], false]
|
||||||
|
@ -360,7 +362,7 @@ class RegExpr
|
||||||
else [ exp.to_s, true]
|
else [ exp.to_s, true]
|
||||||
end
|
end
|
||||||
|
|
||||||
until !t || t. empty?
|
until !t or t.empty?
|
||||||
v, t= self.to_r_next t
|
v, t= self.to_r_next t
|
||||||
case v
|
case v
|
||||||
when ')' then return u, t
|
when ')' then return u, t
|
||||||
|
@ -373,7 +375,7 @@ class RegExpr
|
||||||
|
|
||||||
def to_r_next exp
|
def to_r_next exp
|
||||||
exp.strip!
|
exp.strip!
|
||||||
/^/. match exp[ 1.. -1]
|
/^/ =~ exp[ 1.. -1]
|
||||||
t= case exp[ 0]
|
t= case exp[ 0]
|
||||||
when ?^ then return RegExpr::Begin.new, exp[ 1.. -1]
|
when ?^ then return RegExpr::Begin.new, exp[ 1.. -1]
|
||||||
when ?$ then return RegExpr::End.new, exp[ 1.. -1]
|
when ?$ then return RegExpr::End.new, exp[ 1.. -1]
|
||||||
|
@ -387,7 +389,7 @@ class RegExpr
|
||||||
when ?0.. ?9
|
when ?0.. ?9
|
||||||
exp= 'XX'+ exp[ 1.. -1]
|
exp= 'XX'+ exp[ 1.. -1]
|
||||||
10
|
10
|
||||||
else Kernel. raise ArgumentError, 'Unknown form "%s"'% exp
|
else raise ArgumentError, 'Unknown form "%s"'% exp
|
||||||
end
|
end
|
||||||
i= exp[ 2.. -1].to_i h
|
i= exp[ 2.. -1].to_i h
|
||||||
return RegExpr::Char.new( i.chr), exp[ (i.to_s( h). size+ 2).. -1]
|
return RegExpr::Char.new( i.chr), exp[ (i.to_s( h). size+ 2).. -1]
|
||||||
|
@ -406,7 +408,7 @@ class RegExpr
|
||||||
case exp
|
case exp
|
||||||
when %r<(\d+)..(\d+)> then RegExpr::Range.new $1.to_i, $2.to_i
|
when %r<(\d+)..(\d+)> then RegExpr::Range.new $1.to_i, $2.to_i
|
||||||
when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat.new '', *$1.split( ',')
|
when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat.new '', *$1.split( ',')
|
||||||
else Kernel. raise ArgumentError, 'Unknown form "%s"'% exp
|
else raise ArgumentError, 'Unknown form "%s"'% exp
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -430,7 +432,7 @@ class RegExpr
|
||||||
when %r<^([a-z_][a-z_0-9]*\b)>i then self.to_re $1.to_sym
|
when %r<^([a-z_][a-z_0-9]*\b)>i then self.to_re $1.to_sym
|
||||||
when %r<(\d+)..(\d+)> then RegExpr::Range.new $1.to_i, $2.to_i
|
when %r<(\d+)..(\d+)> then RegExpr::Range.new $1.to_i, $2.to_i
|
||||||
when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat.new '', *$1.split( ',')
|
when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat.new '', *$1.split( ',')
|
||||||
else Kernel. raise ArgumentError, 'Unknown form "%s"'% exp
|
else raise ArgumentError, 'Unknown form "%s"'% exp
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
[ t, $' ]
|
[ t, $' ]
|
||||||
|
@ -440,8 +442,8 @@ class RegExpr
|
||||||
exp= [ :main ] if exp.empty?
|
exp= [ :main ] if exp.empty?
|
||||||
exp.each do |e|
|
exp.each do |e|
|
||||||
re= self.to_re e
|
re= self.to_re e
|
||||||
names= re. names. collect {|n| '@%s'% n }. join ', '
|
names= re.names.collect('@%s'.method(:%)).join ', '
|
||||||
re= ::Regexp. new '^%s$'% re. optimize. to_r
|
re= ::Regexp.new '^%s$'% re.to_r
|
||||||
ev= <<-EOF
|
ev= <<-EOF
|
||||||
def #{e}= val
|
def #{e}= val
|
||||||
m= #{re.inspect}. match val
|
m= #{re.inspect}. match val
|
||||||
|
@ -454,5 +456,7 @@ class RegExpr
|
||||||
cl
|
cl
|
||||||
end
|
end
|
||||||
|
|
||||||
def match( m, exp= :main) self. to_r( exp). match m end
|
def match( m, exp= :main) to_r( exp).match m end
|
||||||
|
|
||||||
|
def =~( x) to_r =~ x end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Add table
Reference in a new issue