class RegExpr< Hash end # any thing class RegExpr::Segment attr_accessor :value def initialize( val) self.value= val end def to_r() self.value.to_s end def empty?() self.value.nil? end def names() @value.names.flatten.compact end def optimize self.value= self.class.optimize self.value self end class <" end def names names= @value.collect &:names names.push( name) unless self.hidden? names.flatten.compact end def initialize *val val= val[ 0] if ::Array === val[ 0] and val.size == 1 super val @hidden= true end def optimize! list, chars= [[]], RegExpr::Chars.new( '') @value.each do |v| v= self.class.optimize v if RegExpr::Or === v list.push [] else list[ -1].push v end end #return self # [A,C,A,C,Cs,As,C,Cs,C] => [A,A,As], chars = [C,C|Cs]++[C|Cs]++[C] list.delete_if do |v| if (1 == v.size and RegExpr::Chars === v[ 0]) or RegExpr::Char === v[ 0] chars+= v[ 0] else false end end chars= chars.optimize values= [] list.each do |v| values.push RegExpr::Or.new values+= if v.size == 1 and RegExpr::Block === v[ 0] and v[ 0].hidden v[ 0].value else v.collect do |w| if RegExpr::Block === w and w.hidden u= false w.value.each do |i| break unless u||= RegExpr::Or === i end u ? w : w.value else w end end.flatten.compact end end values.push RegExpr::Or.new, chars if chars.size > 0 values.shift @value= values self end def to_r() (@hidden ? '(?:%s)' : '(%s)')% @value.collect( &:to_r).join( '') end end # /(?!VALUE)/ class RegExpr::Not< RegExpr::Segment deepest novalue def to_r if @value.instance_of? RegExpr::Chars @value.not! @value.to_s else '(?!%s)'% @value end end end # eg: 1..99 => /[1-9]|[1-9][1-9]/ class RegExpr::Range< RegExpr::Segment novalue attr_accessor :v1, :v2 def names() [] end def optimize() self.value.optimize end def to_r() self.optimize.to_r end def initialize( v1, v2) @v1, @v2= v1, v2 end # algo stolen from thomas leitner def value a, b= @v1< @v2 ? [ @v1, @v2] : [ @v2, @v1] arr= Array[ a] af= a == 0 ? 1.0 : a.to_f bf= b == 0 ? 1.0 : b.to_f 1.upto( b.to_s.length- 1) do |i| pot= 10** i num= (af/ pot).ceil * pot # next higher number with i zeros arr.insert i, num if num < @v2 num= (bf/ pot).floor * pot # next lower number with i zeros arr.insert -i, num end arr.uniq! arr.push b+ 1 # +1 -> to handle it in the same way as the other elements result= RegExpr::Block.new 0.upto( arr. length- 2) do |i| first, second= arr[ i].to_s, (arr[ i+ 1]- 1).to_s result.push RegExpr::Or.new 0.upto( first.length- 1) do |j| fst, sec= first[ j], second[ j] result.push fst == sec ? RegExpr::Char.new( fst.chr) : RegExpr::Chars.new( '%c-%c'% [ fst, sec ]) end end result. value. shift result end end # /[CHARS]/ or /[^CHARS]/ class RegExpr::Chars< RegExpr::Segment deepest attr_reader :chars, :not def to_r() '[%s]'% self. value end def not?() @not end def empty?() @chars. empty? end def size() @chars. size end def value=( val) @chars= (@not= val[ 0] == ?^) ? val[ 1.. -1] : val ; val end def value() (self. not? ? '^' : '')+ (@chars) end def not!() @not= !@not end alias -@ not! def inspect "#<#{self.class.name}: [#{value}]>" end def split chars= [] @chars. gsub( /\\-/) do |r| chars. push ?-.ord nil end. gsub( /.-./) do |r| chars+= (r[ 0].ord .. r[ 2].ord). to_a nil end. bytes. each do |c| chars. push c end chars end def optimize! b2chr= lambda do |b| b = b.chr "-[]".include?( b) ? '\%c'% b : b end chars= self. split. sort. uniq @chars= '' return self if chars.empty? b= chars.shift chars.each do |i| if b+1 == i unless @chars[ -1] == ?- and @chars[-2] != ?\\ @chars+= b2chr.call( b)+ '-' end else @chars+= b2chr.call b end b= i end @chars+= b2chr.call b self end def optimize n= self.dup.optimize! if (n.size == 1 or (n.size == 2 and n.value[ 0] == ?\\ )) and not n.not? RegExpr::Char.new n.chars[ -1] else n end end def + b chars= self.not? ? '^' : '' chars+= if b.instance_of? RegExpr::Char self.split.push b.value[ 0] elsif self.not? == b.not? self.split+ b.split elsif self.not? (0..255).to_a- self.split+ b.split else (0..255).to_a- b.split+ self.split end.compact.uniq.collect {|i| i.chr }.join( '') self.class.new chars end end # /VALUE{MIN,MAX}/ class RegExpr::Repeat< RegExpr::Segment SimpleChar= Hash[ *%w<{,1} ? {0,1} ? {0,} * {,} * {1,} +> + ['{1,1}', ''] ] attr_reader :min, :max def minandmax x case x when nil, '' then nil else x.to_i end end def optimize super r = (min == 1 and max == 1) ? @value : self r end def initialize value, min= 1, max= min super value @min, @max= self.minandmax( min), self.minandmax( max) end def to_r t= '{%s,%s}'% [ @min||'', @max||'' ] return '' if '{0,0}' == t @value.to_r+ (SimpleChar[ t] || t) end end class RegExpr::Char< RegExpr::Segment deepest nooptimize def to_r() ::Regexp.quote @value end def size() 1 end def self.new x x= x.split( '').collect {|i| super i } x.size == 1 ? x[ 0] : RegExpr::Block.new( x) end end class RegExpr::Regexp< RegExpr::Segment deepest nooptimize def to_r() @value. to_s end end class RegExpr::Or< RegExpr::Segment deepest novalue def to_r() '|' end def to_s() '|' end end class RegExpr::End< RegExpr::Segment deepest novalue def to_r() '$' end def to_s() '$' end end class RegExpr::Begin< RegExpr::Segment deepest novalue def to_r() '^' end def to_s() '^' end end class RegExpr::WildCard< RegExpr::Segment deepest nooptimize def to_r() @value end def to_s() @value end end class RegExpr class < '[a-z]', 'hialpha' => '[A-Z]', 'alpha' => 'loalpha | hialpha', 'digit' => '[0-9]', 'alphadigit' => 'alpha | digit', 'hexdigit' => 'digit | [a-fA-F]', 'octdigit' => '[0-7]', 'bindigit' => '[01]', 'space' => '[ \t\n\r\v]' ] def [] *vals ret= super *vals STDEXP.each {|k, v| ret[ k]||= v } ret end def new *vals ret= super *vals STDEXP.each {|k, v| ret[ k]||= v } ret end end def to_r exp= nil r = self.to_re exp r.optimize! h, r = r.hidden?, r.to_r r = r[ 1...-1] unless h ::Regexp.new r end def to_re exp= nil exp||= :main u= RegExpr::Block.new t, u.hidden= if Symbol === exp u.name= exp.to_sym if self[ exp] [ self[ exp], false] else [ self[ exp.to_s], true] end else [ exp.to_s, true] end parse t end def parse t, u= nil u||= RegExpr::Block.new until !t or t.empty? v, t= self.to_r_next t case v when ')' then return u, t when RegExpr::Repeat then v.value= u.pop end u.push v end u end def to_r_next exp exp.strip! /^/ =~ exp[ 1.. -1] t= case exp[ 0] when ?^ then return RegExpr::Begin.new, exp[ 1.. -1] when ?$ then return RegExpr::End.new, exp[ 1.. -1] when ?\\ h= case exp[ 1] when ?D, ?S, ?W, ?a, ?d.. ?f, ?n, ?r.. ?t, ?v, ?w return RegExpr::WildCard.new( '\%c'% exp[ 1]), exp[ 2.. -1] when ?x then 16 when ?o then 8 when ?b then 2 when ?0.. ?9 exp= 'XX'+ exp[ 1.. -1] 10 else raise ArgumentError, 'Unknown form "%s"'% exp end i= exp[ 2.. -1].to_i h return RegExpr::Char.new( i.chr), exp[ (i.to_s( h). size+ 2).. -1] when ?. return RegExpr::WildCard.new( '.'), exp[ 1.. -1] when ?0 case exp[ 1] when ?x then %r<^0x([0-9a-f]+)>i.match exp return '', $1.to_i( 16).to_s+ $' when ?o then %r<^0o([0-8]+)>.match exp return '', $1.to_i( 8).to_s+ $' when ?b then %r<^0b([01]+)>.match exp return '', $1.to_i( 2).to_s+ $' else case exp when %r<(\d+)..(\d+)> RegExpr::Range.new $1.to_i, $2.to_i when %r<^(\d+,\d+|,\d+|\d+,?)> RegExpr::Repeat.new '', *$1.split( ',') else raise ArgumentError, 'Unknown form "%s"'% exp end end when ?( return parse( exp[ 1.. -1]) when ?) then ')' when ?| then RegExpr::Or.new when ?+ then RegExpr::Repeat.new '', 1, nil when ?* then RegExpr::Repeat.new '', nil when ?? then RegExpr::Repeat.new '', 0, 1 when ?" RegExpr::Char.new %r<^"((?:[^"]|\\")*)">.match( exp)[ 1] when ?[ RegExpr::Chars.new %r<^\[((?:[^\]]|\\\])*[^\\]|)\]>.match( exp)[ 1] when ?/ _, re, f= %r<^/((?:[^/]|\\/)*)/(im?|mi)?>.match( exp) flg= $2=~ /i/ ? ::Regexp::IGNORECASE : 0 flg+= $2=~ /m/ ? ::Regexp::MULTILINE : 0 RegExpr::Regexp.new ::Regexp.new( re, flg) else case exp when %r<^([a-z_][a-z_0-9]*\b)>i self.to_re $1.to_sym when %r<(\d+)..(\d+)> RegExpr::Range.new $1.to_i, $2.to_i when %r<^(\d+,\d+|,\d+|\d+,?)> RegExpr::Repeat.new '', *$1.split( ',') else raise ArgumentError, 'Unknown form "%s"'% exp end end [ t, $' ] end def def cl= Class.new, *exp exp= [ :main ] if exp.empty? exp.each do |e| re= self.to_re e names= re.names.collect('@%s'.method(:%)).join ', ' re= ::Regexp.new '^%s$'% re.to_r ev= <<-EOF def #{e}= val m= #{re.inspect}. match val raise ArgumentError, 'Unallowed Chars! (%s =~ #{re.inspect})'% val. inspect unless m #{names}= *m[ 1.. -1] end EOF cl.class_eval ev end cl end def match( m, exp= :main) to_r( exp).match m end def =~( x) to_r =~ x end end