diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 4c54163..0000000 --- a/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -*.swp -*.gemspec -pkg diff --git a/AUTHORS b/AUTHOR similarity index 100% rename from AUTHORS rename to AUTHOR diff --git a/Rakefile b/Rakefile index f8083d5..88f8cc7 100644 --- a/Rakefile +++ b/Rakefile @@ -6,12 +6,12 @@ begin Jeweler::Tasks.new do |gem| gem.name = "RegExpr" gem.summary = %Q{Regular Expression Creator} - gem.description = %Q{Write Regular Expressions in a Hash and generates an optimized Regex} + gem.description = %Q{Write Regular Expressions in a Hash and generats an optimized Regex} gem.email = "Denis.Knauf@gmail.com" gem.homepage = "http://github.com/DenisKnauf/RegExpr" gem.authors = ["Denis Knauf"] - gem.files = %w[AUTHORS README.md VERSION lib/**/*.rb test/**/*.rb] - gem.require_paths = %w[lib] + gem.files = ["README.md", "VERSION", "lib/**/*.rb", "test/**/*.rb"] + gem.require_paths = ["lib"] end Jeweler::GemcutterTasks.new rescue LoadError @@ -38,11 +38,11 @@ rescue LoadError end end -#task :test => :check_dependencies +task :test => :check_dependencies -#task :default => :test +task :default => :test -require 'rdoc/task' +require 'rake/rdoctask' Rake::RDocTask.new do |rdoc| if File.exist?('VERSION') version = File.read('VERSION') @@ -51,7 +51,7 @@ Rake::RDocTask.new do |rdoc| end rdoc.rdoc_dir = 'rdoc' - rdoc.title = "RegExpr #{version}" + rdoc.title = "sbdb #{version}" rdoc.rdoc_files.include('README*') rdoc.rdoc_files.include('lib/**/*.rb') end diff --git a/VERSION b/VERSION index 4e379d2..8acdd82 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.2 +0.0.1 diff --git a/lib/regexpr.rb b/lib/regexpr.rb index fad678e..c513cf5 100644 --- a/lib/regexpr.rb +++ b/lib/regexpr.rb @@ -2,7 +2,6 @@ class RegExpr< Hash end -# any thing class RegExpr::Segment attr_accessor :value def initialize( val) self.value= val end @@ -23,14 +22,12 @@ class RegExpr::Segment v end - # can't have any segment as value def deepest self.class_eval do def names() [] end end end - # can't have any value def novalue self.class_eval do def initialize() end @@ -48,7 +45,6 @@ class RegExpr::Segment end end -# /(VALUE)/ or /(?:VALUE)/ class RegExpr::Block< RegExpr::Segment attr_accessor :name, :hidden def hidden?() @hidden end @@ -58,10 +54,6 @@ class RegExpr::Block< RegExpr::Segment def empty?() @value.empty? end def size() @value.size end - def inspect - "#<#{self.class.name}: (#{hidden? ? '?:' : ''} #{value.map(&:inspect).join ' '} )>" - end - def names names= @value.collect &:names names.push( name) unless self.hidden? @@ -84,11 +76,9 @@ class RegExpr::Block< RegExpr::Segment else list[ -1].push v end end - #return self - # [A,C,A,C,Cs,As,C,Cs,C] => [A,A,As], chars = [C,C|Cs]++[C|Cs]++[C] list.delete_if do |v| - if (1 == v.size and RegExpr::Chars === v[ 0]) or RegExpr::Char === v[ 0] + if (RegExpr::Chars === v[ 0] and v.size == 1 ) or RegExpr::Char === v[ 0] chars+= v[ 0] else false end @@ -110,7 +100,7 @@ class RegExpr::Block< RegExpr::Segment u ? w : w.value else w end - end.flatten.compact + end.flatten end end values.push RegExpr::Or.new, chars if chars.size > 0 @@ -124,7 +114,6 @@ class RegExpr::Block< RegExpr::Segment end end -# /(?!VALUE)/ class RegExpr::Not< RegExpr::Segment deepest novalue @@ -137,7 +126,6 @@ class RegExpr::Not< RegExpr::Segment end end -# eg: 1..99 => /[1-9]|[1-9][1-9]/ class RegExpr::Range< RegExpr::Segment novalue attr_accessor :v1, :v2 @@ -155,9 +143,9 @@ class RegExpr::Range< RegExpr::Segment bf= b == 0 ? 1.0 : b.to_f 1.upto( b.to_s.length- 1) do |i| pot= 10** i - num= (af/ pot).ceil * pot # next higher number with i zeros + num= (af/ pot).ceil* pot # next higher number with i zeros arr.insert i, num if num < @v2 - num= (bf/ pot).floor * pot # next lower number with i zeros + num= (bf/ pot).floor* pot # next lower number with i zeros arr.insert -i, num end arr.uniq! @@ -165,13 +153,15 @@ class RegExpr::Range< RegExpr::Segment result= RegExpr::Block.new 0.upto( arr. length- 2) do |i| - first, second= arr[ i].to_s, (arr[ i+ 1]- 1).to_s + first= arr[ i].to_s + second= (arr[ i+ 1]- 1).to_s result.push RegExpr::Or.new 0.upto( first.length- 1) do |j| - fst, sec= first[ j], second[ j] - result.push fst == sec ? - RegExpr::Char.new( fst.chr) : - RegExpr::Chars.new( '%c-%c'% [ fst, sec ]) + result.push( if first[ j] == second[ j] + RegExpr::Char.new first[ j].chr + else + RegExpr::Chars.new '%c-%c'% [ first[ j], second[ j] ] + end) end end result. value. shift @@ -179,7 +169,6 @@ class RegExpr::Range< RegExpr::Segment end end -# /[CHARS]/ or /[^CHARS]/ class RegExpr::Chars< RegExpr::Segment deepest attr_reader :chars, :not @@ -192,10 +181,6 @@ class RegExpr::Chars< RegExpr::Segment def not!() @not= !@not end alias -@ not! - def inspect - "#<#{self.class.name}: [#{value}]>" - end - def split chars= [] @chars. gsub( /\\-/) do |r| @@ -256,9 +241,7 @@ class RegExpr::Chars< RegExpr::Segment end end -# /VALUE{MIN,MAX}/ class RegExpr::Repeat< RegExpr::Segment - SimpleChar= Hash[ *%w<{,1} ? {0,1} ? {0,} * {,} * {1,} +> + ['{1,1}', ''] ] attr_reader :min, :max def minandmax x @@ -282,7 +265,8 @@ class RegExpr::Repeat< RegExpr::Segment def to_r t= '{%s,%s}'% [ @min||'', @max||'' ] return '' if '{0,0}' == t - @value.to_r+ (SimpleChar[ t] || t) + t= Hash[ *%w<{,1} ? {0,1} ? {0,} * {,} * {1,} +>+ ['{1,1}', ''] ][ t]|| t + @value.to_r+ t end end @@ -359,16 +343,15 @@ class RegExpr end end - def to_r exp= nil - r = self.to_re exp - r.optimize! + def to_r exp= :main + r = self.to_re( exp) + #r.optimize! h, r = r.hidden?, r.to_r r = r[ 1...-1] unless h ::Regexp.new r end - def to_re exp= nil - exp||= :main + def to_re exp= :main u= RegExpr::Block.new t, u.hidden= if Symbol === exp u.name= exp.to_sym @@ -378,11 +361,7 @@ class RegExpr end else [ exp.to_s, true] end - parse t - end - def parse t, u= nil - u||= RegExpr::Block.new until !t or t.empty? v, t= self.to_r_next t case v @@ -415,8 +394,7 @@ class RegExpr i= exp[ 2.. -1].to_i h return RegExpr::Char.new( i.chr), exp[ (i.to_s( h). size+ 2).. -1] - when ?. - return RegExpr::WildCard.new( '.'), exp[ 1.. -1] + when ?. then return RegExpr::WildCard.new( '.'), exp[ 1.. -1] when ?0 case exp[ 1] @@ -428,17 +406,13 @@ class RegExpr return '', $1.to_i( 2).to_s+ $' else case exp - when %r<(\d+)..(\d+)> - RegExpr::Range.new $1.to_i, $2.to_i - when %r<^(\d+,\d+|,\d+|\d+,?)> - RegExpr::Repeat.new '', *$1.split( ',') - else - raise ArgumentError, 'Unknown form "%s"'% exp + when %r<(\d+)..(\d+)> then RegExpr::Range.new $1.to_i, $2.to_i + when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat.new '', *$1.split( ',') + else raise ArgumentError, 'Unknown form "%s"'% exp end end - when ?( - return parse( exp[ 1.. -1]) + when ?( then return self.to_re( exp[ 1.. -1]) when ?) then ')' when ?| then RegExpr::Or.new @@ -446,26 +420,19 @@ class RegExpr when ?* then RegExpr::Repeat.new '', nil when ?? then RegExpr::Repeat.new '', 0, 1 - when ?" - RegExpr::Char.new %r<^"((?:[^"]|\\")*)">.match( exp)[ 1] - when ?[ - RegExpr::Chars.new %r<^\[((?:[^\]]|\\\])*[^\\]|)\]>.match( exp)[ 1] - when ?/ - _, re, f= %r<^/((?:[^/]|\\/)*)/(im?|mi)?>.match( exp) - flg= $2=~ /i/ ? ::Regexp::IGNORECASE : 0 - flg+= $2=~ /m/ ? ::Regexp::MULTILINE : 0 - RegExpr::Regexp.new ::Regexp.new( re, flg) + when ?" then RegExpr::Char.new %r<^"((?:[^"]|\\")*)">.match( exp)[ 1] + when ?[ then RegExpr::Chars.new %r<^\[((?:[^\]]|\\\])*[^\\]|)\]>.match( exp)[ 1] + when ?/ then exp =~ %r<^/((?:[^/]|\\/)*)/(im?|mi)?> + RegExpr::Regexp.new ::Regexp.new( $1, + ($2 =~ /i/ ? ::Regexp::IGNORECASE : 0)+ + ($2 =~ /m/ ? ::Regexp::MULTILINE : 0)) else case exp - when %r<^([a-z_][a-z_0-9]*\b)>i - self.to_re $1.to_sym - when %r<(\d+)..(\d+)> - RegExpr::Range.new $1.to_i, $2.to_i - when %r<^(\d+,\d+|,\d+|\d+,?)> - RegExpr::Repeat.new '', *$1.split( ',') - else - raise ArgumentError, 'Unknown form "%s"'% exp + when %r<^([a-z_][a-z_0-9]*\b)>i then self.to_re $1.to_sym + when %r<(\d+)..(\d+)> then RegExpr::Range.new $1.to_i, $2.to_i + when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat.new '', *$1.split( ',') + else raise ArgumentError, 'Unknown form "%s"'% exp end end [ t, $' ] diff --git a/tests/optimizer.rb b/tests/optimizer.rb deleted file mode 100644 index d0db55c..0000000 --- a/tests/optimizer.rb +++ /dev/null @@ -1,6 +0,0 @@ -class OptimizerTest < Test::Unit::TestCase - context "Optimizer" do - should "optimize many charblocks to one charblock" do - /[a-dt-z]/.to_s == RegExpr[ main: '[a-d] | [t-z]'].to_r.to_s - end -end