From c63268aec23f1b62f329e365d9e2fbe926593b62 Mon Sep 17 00:00:00 2001 From: Denis Knauf Date: Sat, 20 Mar 2010 23:21:54 +0100 Subject: [PATCH 1/7] little changes: metafiles --- AUTHOR => AUTHORS | 0 Rakefile | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename AUTHOR => AUTHORS (100%) diff --git a/AUTHOR b/AUTHORS similarity index 100% rename from AUTHOR rename to AUTHORS diff --git a/Rakefile b/Rakefile index 88f8cc7..f415ad1 100644 --- a/Rakefile +++ b/Rakefile @@ -10,8 +10,8 @@ begin gem.email = "Denis.Knauf@gmail.com" gem.homepage = "http://github.com/DenisKnauf/RegExpr" gem.authors = ["Denis Knauf"] - gem.files = ["README.md", "VERSION", "lib/**/*.rb", "test/**/*.rb"] - gem.require_paths = ["lib"] + gem.files = %w[AUTHORS README.md VERSION lib/**/*.rb test/**/*.rb] + gem.require_paths = %w[lib] end Jeweler::GemcutterTasks.new rescue LoadError From 86d65950fef8a1d10aab4c6621dcc3c76b9d8fc8 Mon Sep 17 00:00:00 2001 From: Denis Knauf Date: Sun, 9 Dec 2012 13:22:21 +0100 Subject: [PATCH 2/7] optimizer fixed --- lib/regexpr.rb | 97 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 32 deletions(-) diff --git a/lib/regexpr.rb b/lib/regexpr.rb index c513cf5..fad678e 100644 --- a/lib/regexpr.rb +++ b/lib/regexpr.rb @@ -2,6 +2,7 @@ class RegExpr< Hash end +# any thing class RegExpr::Segment attr_accessor :value def initialize( val) self.value= val end @@ -22,12 +23,14 @@ class RegExpr::Segment v end + # can't have any segment as value def deepest self.class_eval do def names() [] end end end + # can't have any value def novalue self.class_eval do def initialize() end @@ -45,6 +48,7 @@ class RegExpr::Segment end end +# /(VALUE)/ or /(?:VALUE)/ class RegExpr::Block< RegExpr::Segment attr_accessor :name, :hidden def hidden?() @hidden end @@ -54,6 +58,10 @@ class RegExpr::Block< RegExpr::Segment def empty?() @value.empty? end def size() @value.size end + def inspect + "#<#{self.class.name}: (#{hidden? ? '?:' : ''} #{value.map(&:inspect).join ' '} )>" + end + def names names= @value.collect &:names names.push( name) unless self.hidden? @@ -76,9 +84,11 @@ class RegExpr::Block< RegExpr::Segment else list[ -1].push v end end + #return self + # [A,C,A,C,Cs,As,C,Cs,C] => [A,A,As], chars = [C,C|Cs]++[C|Cs]++[C] list.delete_if do |v| - if (RegExpr::Chars === v[ 0] and v.size == 1 ) or RegExpr::Char === v[ 0] + if (1 == v.size and RegExpr::Chars === v[ 0]) or RegExpr::Char === v[ 0] chars+= v[ 0] else false end @@ -100,7 +110,7 @@ class RegExpr::Block< RegExpr::Segment u ? w : w.value else w end - end.flatten + end.flatten.compact end end values.push RegExpr::Or.new, chars if chars.size > 0 @@ -114,6 +124,7 @@ class RegExpr::Block< RegExpr::Segment end end +# /(?!VALUE)/ class RegExpr::Not< RegExpr::Segment deepest novalue @@ -126,6 +137,7 @@ class RegExpr::Not< RegExpr::Segment end end +# eg: 1..99 => /[1-9]|[1-9][1-9]/ class RegExpr::Range< RegExpr::Segment novalue attr_accessor :v1, :v2 @@ -143,9 +155,9 @@ class RegExpr::Range< RegExpr::Segment bf= b == 0 ? 1.0 : b.to_f 1.upto( b.to_s.length- 1) do |i| pot= 10** i - num= (af/ pot).ceil* pot # next higher number with i zeros + num= (af/ pot).ceil * pot # next higher number with i zeros arr.insert i, num if num < @v2 - num= (bf/ pot).floor* pot # next lower number with i zeros + num= (bf/ pot).floor * pot # next lower number with i zeros arr.insert -i, num end arr.uniq! @@ -153,15 +165,13 @@ class RegExpr::Range< RegExpr::Segment result= RegExpr::Block.new 0.upto( arr. length- 2) do |i| - first= arr[ i].to_s - second= (arr[ i+ 1]- 1).to_s + first, second= arr[ i].to_s, (arr[ i+ 1]- 1).to_s result.push RegExpr::Or.new 0.upto( first.length- 1) do |j| - result.push( if first[ j] == second[ j] - RegExpr::Char.new first[ j].chr - else - RegExpr::Chars.new '%c-%c'% [ first[ j], second[ j] ] - end) + fst, sec= first[ j], second[ j] + result.push fst == sec ? + RegExpr::Char.new( fst.chr) : + RegExpr::Chars.new( '%c-%c'% [ fst, sec ]) end end result. value. shift @@ -169,6 +179,7 @@ class RegExpr::Range< RegExpr::Segment end end +# /[CHARS]/ or /[^CHARS]/ class RegExpr::Chars< RegExpr::Segment deepest attr_reader :chars, :not @@ -181,6 +192,10 @@ class RegExpr::Chars< RegExpr::Segment def not!() @not= !@not end alias -@ not! + def inspect + "#<#{self.class.name}: [#{value}]>" + end + def split chars= [] @chars. gsub( /\\-/) do |r| @@ -241,7 +256,9 @@ class RegExpr::Chars< RegExpr::Segment end end +# /VALUE{MIN,MAX}/ class RegExpr::Repeat< RegExpr::Segment + SimpleChar= Hash[ *%w<{,1} ? {0,1} ? {0,} * {,} * {1,} +> + ['{1,1}', ''] ] attr_reader :min, :max def minandmax x @@ -265,8 +282,7 @@ class RegExpr::Repeat< RegExpr::Segment def to_r t= '{%s,%s}'% [ @min||'', @max||'' ] return '' if '{0,0}' == t - t= Hash[ *%w<{,1} ? {0,1} ? {0,} * {,} * {1,} +>+ ['{1,1}', ''] ][ t]|| t - @value.to_r+ t + @value.to_r+ (SimpleChar[ t] || t) end end @@ -343,15 +359,16 @@ class RegExpr end end - def to_r exp= :main - r = self.to_re( exp) - #r.optimize! + def to_r exp= nil + r = self.to_re exp + r.optimize! h, r = r.hidden?, r.to_r r = r[ 1...-1] unless h ::Regexp.new r end - def to_re exp= :main + def to_re exp= nil + exp||= :main u= RegExpr::Block.new t, u.hidden= if Symbol === exp u.name= exp.to_sym @@ -361,7 +378,11 @@ class RegExpr end else [ exp.to_s, true] end + parse t + end + def parse t, u= nil + u||= RegExpr::Block.new until !t or t.empty? v, t= self.to_r_next t case v @@ -394,7 +415,8 @@ class RegExpr i= exp[ 2.. -1].to_i h return RegExpr::Char.new( i.chr), exp[ (i.to_s( h). size+ 2).. -1] - when ?. then return RegExpr::WildCard.new( '.'), exp[ 1.. -1] + when ?. + return RegExpr::WildCard.new( '.'), exp[ 1.. -1] when ?0 case exp[ 1] @@ -406,13 +428,17 @@ class RegExpr return '', $1.to_i( 2).to_s+ $' else case exp - when %r<(\d+)..(\d+)> then RegExpr::Range.new $1.to_i, $2.to_i - when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat.new '', *$1.split( ',') - else raise ArgumentError, 'Unknown form "%s"'% exp + when %r<(\d+)..(\d+)> + RegExpr::Range.new $1.to_i, $2.to_i + when %r<^(\d+,\d+|,\d+|\d+,?)> + RegExpr::Repeat.new '', *$1.split( ',') + else + raise ArgumentError, 'Unknown form "%s"'% exp end end - when ?( then return self.to_re( exp[ 1.. -1]) + when ?( + return parse( exp[ 1.. -1]) when ?) then ')' when ?| then RegExpr::Or.new @@ -420,19 +446,26 @@ class RegExpr when ?* then RegExpr::Repeat.new '', nil when ?? then RegExpr::Repeat.new '', 0, 1 - when ?" then RegExpr::Char.new %r<^"((?:[^"]|\\")*)">.match( exp)[ 1] - when ?[ then RegExpr::Chars.new %r<^\[((?:[^\]]|\\\])*[^\\]|)\]>.match( exp)[ 1] - when ?/ then exp =~ %r<^/((?:[^/]|\\/)*)/(im?|mi)?> - RegExpr::Regexp.new ::Regexp.new( $1, - ($2 =~ /i/ ? ::Regexp::IGNORECASE : 0)+ - ($2 =~ /m/ ? ::Regexp::MULTILINE : 0)) + when ?" + RegExpr::Char.new %r<^"((?:[^"]|\\")*)">.match( exp)[ 1] + when ?[ + RegExpr::Chars.new %r<^\[((?:[^\]]|\\\])*[^\\]|)\]>.match( exp)[ 1] + when ?/ + _, re, f= %r<^/((?:[^/]|\\/)*)/(im?|mi)?>.match( exp) + flg= $2=~ /i/ ? ::Regexp::IGNORECASE : 0 + flg+= $2=~ /m/ ? ::Regexp::MULTILINE : 0 + RegExpr::Regexp.new ::Regexp.new( re, flg) else case exp - when %r<^([a-z_][a-z_0-9]*\b)>i then self.to_re $1.to_sym - when %r<(\d+)..(\d+)> then RegExpr::Range.new $1.to_i, $2.to_i - when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat.new '', *$1.split( ',') - else raise ArgumentError, 'Unknown form "%s"'% exp + when %r<^([a-z_][a-z_0-9]*\b)>i + self.to_re $1.to_sym + when %r<(\d+)..(\d+)> + RegExpr::Range.new $1.to_i, $2.to_i + when %r<^(\d+,\d+|,\d+|\d+,?)> + RegExpr::Repeat.new '', *$1.split( ',') + else + raise ArgumentError, 'Unknown form "%s"'% exp end end [ t, $' ] From bc01c2c8b80033d77db88badd9facf14b19bbec7 Mon Sep 17 00:00:00 2001 From: Denis Knauf Date: Sun, 9 Dec 2012 13:24:52 +0100 Subject: [PATCH 3/7] rdoc-fix, 0.0.2 --- Rakefile | 8 ++++---- VERSION | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Rakefile b/Rakefile index f415ad1..8c20dc8 100644 --- a/Rakefile +++ b/Rakefile @@ -38,11 +38,11 @@ rescue LoadError end end -task :test => :check_dependencies +#task :test => :check_dependencies -task :default => :test +#task :default => :test -require 'rake/rdoctask' +require 'rdoc/task' Rake::RDocTask.new do |rdoc| if File.exist?('VERSION') version = File.read('VERSION') @@ -51,7 +51,7 @@ Rake::RDocTask.new do |rdoc| end rdoc.rdoc_dir = 'rdoc' - rdoc.title = "sbdb #{version}" + rdoc.title = "RegExpr #{version}" rdoc.rdoc_files.include('README*') rdoc.rdoc_files.include('lib/**/*.rb') end diff --git a/VERSION b/VERSION index 8acdd82..4e379d2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.1 +0.0.2 From ea746a749aa82ed418c3fd2586479bad8934b182 Mon Sep 17 00:00:00 2001 From: Denis Knauf Date: Sun, 9 Dec 2012 13:27:06 +0100 Subject: [PATCH 4/7] .gitignore, first test added. --- .gitignore | 1 + tests/optimizer.rb | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 .gitignore create mode 100644 tests/optimizer.rb diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1377554 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.swp diff --git a/tests/optimizer.rb b/tests/optimizer.rb new file mode 100644 index 0000000..d0db55c --- /dev/null +++ b/tests/optimizer.rb @@ -0,0 +1,6 @@ +class OptimizerTest < Test::Unit::TestCase + context "Optimizer" do + should "optimize many charblocks to one charblock" do + /[a-dt-z]/.to_s == RegExpr[ main: '[a-d] | [t-z]'].to_r.to_s + end +end From 407e79c6ce725de5efbdbab372ea57cc38917974 Mon Sep 17 00:00:00 2001 From: Denis Knauf Date: Sun, 9 Dec 2012 13:28:09 +0100 Subject: [PATCH 5/7] .gitignore: *.gemspec --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1377554..c5dba7c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ *.swp +*.gemspec From 42032e26cca05e91c2916feaea9a5b427b3379a4 Mon Sep 17 00:00:00 2001 From: Denis Knauf Date: Sun, 9 Dec 2012 13:29:07 +0100 Subject: [PATCH 6/7] .gitignore: pkg --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index c5dba7c..4c54163 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.swp *.gemspec +pkg From ded059569cba6133216b6ca2cffa5fe94ab16a95 Mon Sep 17 00:00:00 2001 From: MBean Date: Fri, 29 Mar 2013 14:37:50 -0600 Subject: [PATCH 7/7] Fixed a typo. --- Rakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Rakefile b/Rakefile index 8c20dc8..f8083d5 100644 --- a/Rakefile +++ b/Rakefile @@ -6,7 +6,7 @@ begin Jeweler::Tasks.new do |gem| gem.name = "RegExpr" gem.summary = %Q{Regular Expression Creator} - gem.description = %Q{Write Regular Expressions in a Hash and generats an optimized Regex} + gem.description = %Q{Write Regular Expressions in a Hash and generates an optimized Regex} gem.email = "Denis.Knauf@gmail.com" gem.homepage = "http://github.com/DenisKnauf/RegExpr" gem.authors = ["Denis Knauf"]