Compare commits

..

No commits in common. "master" and "v0.0.1" have entirely different histories.

6 changed files with 40 additions and 82 deletions

3
.gitignore vendored
View file

@ -1,3 +0,0 @@
*.swp
*.gemspec
pkg

View file

View file

@ -6,12 +6,12 @@ begin
Jeweler::Tasks.new do |gem| Jeweler::Tasks.new do |gem|
gem.name = "RegExpr" gem.name = "RegExpr"
gem.summary = %Q{Regular Expression Creator} gem.summary = %Q{Regular Expression Creator}
gem.description = %Q{Write Regular Expressions in a Hash and generates an optimized Regex} gem.description = %Q{Write Regular Expressions in a Hash and generats an optimized Regex}
gem.email = "Denis.Knauf@gmail.com" gem.email = "Denis.Knauf@gmail.com"
gem.homepage = "http://github.com/DenisKnauf/RegExpr" gem.homepage = "http://github.com/DenisKnauf/RegExpr"
gem.authors = ["Denis Knauf"] gem.authors = ["Denis Knauf"]
gem.files = %w[AUTHORS README.md VERSION lib/**/*.rb test/**/*.rb] gem.files = ["README.md", "VERSION", "lib/**/*.rb", "test/**/*.rb"]
gem.require_paths = %w[lib] gem.require_paths = ["lib"]
end end
Jeweler::GemcutterTasks.new Jeweler::GemcutterTasks.new
rescue LoadError rescue LoadError
@ -38,11 +38,11 @@ rescue LoadError
end end
end end
#task :test => :check_dependencies task :test => :check_dependencies
#task :default => :test task :default => :test
require 'rdoc/task' require 'rake/rdoctask'
Rake::RDocTask.new do |rdoc| Rake::RDocTask.new do |rdoc|
if File.exist?('VERSION') if File.exist?('VERSION')
version = File.read('VERSION') version = File.read('VERSION')
@ -51,7 +51,7 @@ Rake::RDocTask.new do |rdoc|
end end
rdoc.rdoc_dir = 'rdoc' rdoc.rdoc_dir = 'rdoc'
rdoc.title = "RegExpr #{version}" rdoc.title = "sbdb #{version}"
rdoc.rdoc_files.include('README*') rdoc.rdoc_files.include('README*')
rdoc.rdoc_files.include('lib/**/*.rb') rdoc.rdoc_files.include('lib/**/*.rb')
end end

View file

@ -1 +1 @@
0.0.2 0.0.1

View file

@ -2,7 +2,6 @@
class RegExpr< Hash class RegExpr< Hash
end end
# any thing
class RegExpr::Segment class RegExpr::Segment
attr_accessor :value attr_accessor :value
def initialize( val) self.value= val end def initialize( val) self.value= val end
@ -23,14 +22,12 @@ class RegExpr::Segment
v v
end end
# can't have any segment as value
def deepest def deepest
self.class_eval do self.class_eval do
def names() [] end def names() [] end
end end
end end
# can't have any value
def novalue def novalue
self.class_eval do self.class_eval do
def initialize() end def initialize() end
@ -48,7 +45,6 @@ class RegExpr::Segment
end end
end end
# /(VALUE)/ or /(?:VALUE)/
class RegExpr::Block< RegExpr::Segment class RegExpr::Block< RegExpr::Segment
attr_accessor :name, :hidden attr_accessor :name, :hidden
def hidden?() @hidden end def hidden?() @hidden end
@ -58,10 +54,6 @@ class RegExpr::Block< RegExpr::Segment
def empty?() @value.empty? end def empty?() @value.empty? end
def size() @value.size end def size() @value.size end
def inspect
"#<#{self.class.name}: (#{hidden? ? '?:' : ''} #{value.map(&:inspect).join ' '} )>"
end
def names def names
names= @value.collect &:names names= @value.collect &:names
names.push( name) unless self.hidden? names.push( name) unless self.hidden?
@ -84,11 +76,9 @@ class RegExpr::Block< RegExpr::Segment
else list[ -1].push v else list[ -1].push v
end end
end end
#return self
# [A,C,A,C,Cs,As,C,Cs,C] => [A,A,As], chars = [C,C|Cs]++[C|Cs]++[C]
list.delete_if do |v| list.delete_if do |v|
if (1 == v.size and RegExpr::Chars === v[ 0]) or RegExpr::Char === v[ 0] if (RegExpr::Chars === v[ 0] and v.size == 1 ) or RegExpr::Char === v[ 0]
chars+= v[ 0] chars+= v[ 0]
else false else false
end end
@ -110,7 +100,7 @@ class RegExpr::Block< RegExpr::Segment
u ? w : w.value u ? w : w.value
else w else w
end end
end.flatten.compact end.flatten
end end
end end
values.push RegExpr::Or.new, chars if chars.size > 0 values.push RegExpr::Or.new, chars if chars.size > 0
@ -124,7 +114,6 @@ class RegExpr::Block< RegExpr::Segment
end end
end end
# /(?!VALUE)/
class RegExpr::Not< RegExpr::Segment class RegExpr::Not< RegExpr::Segment
deepest deepest
novalue novalue
@ -137,7 +126,6 @@ class RegExpr::Not< RegExpr::Segment
end end
end end
# eg: 1..99 => /[1-9]|[1-9][1-9]/
class RegExpr::Range< RegExpr::Segment class RegExpr::Range< RegExpr::Segment
novalue novalue
attr_accessor :v1, :v2 attr_accessor :v1, :v2
@ -155,9 +143,9 @@ class RegExpr::Range< RegExpr::Segment
bf= b == 0 ? 1.0 : b.to_f bf= b == 0 ? 1.0 : b.to_f
1.upto( b.to_s.length- 1) do |i| 1.upto( b.to_s.length- 1) do |i|
pot= 10** i pot= 10** i
num= (af/ pot).ceil * pot # next higher number with i zeros num= (af/ pot).ceil* pot # next higher number with i zeros
arr.insert i, num if num < @v2 arr.insert i, num if num < @v2
num= (bf/ pot).floor * pot # next lower number with i zeros num= (bf/ pot).floor* pot # next lower number with i zeros
arr.insert -i, num arr.insert -i, num
end end
arr.uniq! arr.uniq!
@ -165,13 +153,15 @@ class RegExpr::Range< RegExpr::Segment
result= RegExpr::Block.new result= RegExpr::Block.new
0.upto( arr. length- 2) do |i| 0.upto( arr. length- 2) do |i|
first, second= arr[ i].to_s, (arr[ i+ 1]- 1).to_s first= arr[ i].to_s
second= (arr[ i+ 1]- 1).to_s
result.push RegExpr::Or.new result.push RegExpr::Or.new
0.upto( first.length- 1) do |j| 0.upto( first.length- 1) do |j|
fst, sec= first[ j], second[ j] result.push( if first[ j] == second[ j]
result.push fst == sec ? RegExpr::Char.new first[ j].chr
RegExpr::Char.new( fst.chr) : else
RegExpr::Chars.new( '%c-%c'% [ fst, sec ]) RegExpr::Chars.new '%c-%c'% [ first[ j], second[ j] ]
end)
end end
end end
result. value. shift result. value. shift
@ -179,7 +169,6 @@ class RegExpr::Range< RegExpr::Segment
end end
end end
# /[CHARS]/ or /[^CHARS]/
class RegExpr::Chars< RegExpr::Segment class RegExpr::Chars< RegExpr::Segment
deepest deepest
attr_reader :chars, :not attr_reader :chars, :not
@ -192,10 +181,6 @@ class RegExpr::Chars< RegExpr::Segment
def not!() @not= !@not end def not!() @not= !@not end
alias -@ not! alias -@ not!
def inspect
"#<#{self.class.name}: [#{value}]>"
end
def split def split
chars= [] chars= []
@chars. gsub( /\\-/) do |r| @chars. gsub( /\\-/) do |r|
@ -256,9 +241,7 @@ class RegExpr::Chars< RegExpr::Segment
end end
end end
# /VALUE{MIN,MAX}/
class RegExpr::Repeat< RegExpr::Segment class RegExpr::Repeat< RegExpr::Segment
SimpleChar= Hash[ *%w<{,1} ? {0,1} ? {0,} * {,} * {1,} +> + ['{1,1}', ''] ]
attr_reader :min, :max attr_reader :min, :max
def minandmax x def minandmax x
@ -282,7 +265,8 @@ class RegExpr::Repeat< RegExpr::Segment
def to_r def to_r
t= '{%s,%s}'% [ @min||'', @max||'' ] t= '{%s,%s}'% [ @min||'', @max||'' ]
return '' if '{0,0}' == t return '' if '{0,0}' == t
@value.to_r+ (SimpleChar[ t] || t) t= Hash[ *%w<{,1} ? {0,1} ? {0,} * {,} * {1,} +>+ ['{1,1}', ''] ][ t]|| t
@value.to_r+ t
end end
end end
@ -359,16 +343,15 @@ class RegExpr
end end
end end
def to_r exp= nil def to_r exp= :main
r = self.to_re exp r = self.to_re( exp)
r.optimize! #r.optimize!
h, r = r.hidden?, r.to_r h, r = r.hidden?, r.to_r
r = r[ 1...-1] unless h r = r[ 1...-1] unless h
::Regexp.new r ::Regexp.new r
end end
def to_re exp= nil def to_re exp= :main
exp||= :main
u= RegExpr::Block.new u= RegExpr::Block.new
t, u.hidden= if Symbol === exp t, u.hidden= if Symbol === exp
u.name= exp.to_sym u.name= exp.to_sym
@ -378,11 +361,7 @@ class RegExpr
end end
else [ exp.to_s, true] else [ exp.to_s, true]
end end
parse t
end
def parse t, u= nil
u||= RegExpr::Block.new
until !t or t.empty? until !t or t.empty?
v, t= self.to_r_next t v, t= self.to_r_next t
case v case v
@ -415,8 +394,7 @@ class RegExpr
i= exp[ 2.. -1].to_i h i= exp[ 2.. -1].to_i h
return RegExpr::Char.new( i.chr), exp[ (i.to_s( h). size+ 2).. -1] return RegExpr::Char.new( i.chr), exp[ (i.to_s( h). size+ 2).. -1]
when ?. when ?. then return RegExpr::WildCard.new( '.'), exp[ 1.. -1]
return RegExpr::WildCard.new( '.'), exp[ 1.. -1]
when ?0 when ?0
case exp[ 1] case exp[ 1]
@ -428,17 +406,13 @@ class RegExpr
return '', $1.to_i( 2).to_s+ $' return '', $1.to_i( 2).to_s+ $'
else else
case exp case exp
when %r<(\d+)..(\d+)> when %r<(\d+)..(\d+)> then RegExpr::Range.new $1.to_i, $2.to_i
RegExpr::Range.new $1.to_i, $2.to_i when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat.new '', *$1.split( ',')
when %r<^(\d+,\d+|,\d+|\d+,?)> else raise ArgumentError, 'Unknown form "%s"'% exp
RegExpr::Repeat.new '', *$1.split( ',')
else
raise ArgumentError, 'Unknown form "%s"'% exp
end end
end end
when ?( when ?( then return self.to_re( exp[ 1.. -1])
return parse( exp[ 1.. -1])
when ?) then ')' when ?) then ')'
when ?| then RegExpr::Or.new when ?| then RegExpr::Or.new
@ -446,26 +420,19 @@ class RegExpr
when ?* then RegExpr::Repeat.new '', nil when ?* then RegExpr::Repeat.new '', nil
when ?? then RegExpr::Repeat.new '', 0, 1 when ?? then RegExpr::Repeat.new '', 0, 1
when ?" when ?" then RegExpr::Char.new %r<^"((?:[^"]|\\")*)">.match( exp)[ 1]
RegExpr::Char.new %r<^"((?:[^"]|\\")*)">.match( exp)[ 1] when ?[ then RegExpr::Chars.new %r<^\[((?:[^\]]|\\\])*[^\\]|)\]>.match( exp)[ 1]
when ?[ when ?/ then exp =~ %r<^/((?:[^/]|\\/)*)/(im?|mi)?>
RegExpr::Chars.new %r<^\[((?:[^\]]|\\\])*[^\\]|)\]>.match( exp)[ 1] RegExpr::Regexp.new ::Regexp.new( $1,
when ?/ ($2 =~ /i/ ? ::Regexp::IGNORECASE : 0)+
_, re, f= %r<^/((?:[^/]|\\/)*)/(im?|mi)?>.match( exp) ($2 =~ /m/ ? ::Regexp::MULTILINE : 0))
flg= $2=~ /i/ ? ::Regexp::IGNORECASE : 0
flg+= $2=~ /m/ ? ::Regexp::MULTILINE : 0
RegExpr::Regexp.new ::Regexp.new( re, flg)
else else
case exp case exp
when %r<^([a-z_][a-z_0-9]*\b)>i when %r<^([a-z_][a-z_0-9]*\b)>i then self.to_re $1.to_sym
self.to_re $1.to_sym when %r<(\d+)..(\d+)> then RegExpr::Range.new $1.to_i, $2.to_i
when %r<(\d+)..(\d+)> when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat.new '', *$1.split( ',')
RegExpr::Range.new $1.to_i, $2.to_i else raise ArgumentError, 'Unknown form "%s"'% exp
when %r<^(\d+,\d+|,\d+|\d+,?)>
RegExpr::Repeat.new '', *$1.split( ',')
else
raise ArgumentError, 'Unknown form "%s"'% exp
end end
end end
[ t, $' ] [ t, $' ]

View file

@ -1,6 +0,0 @@
class OptimizerTest < Test::Unit::TestCase
context "Optimizer" do
should "optimize many charblocks to one charblock" do
/[a-dt-z]/.to_s == RegExpr[ main: '[a-d] | [t-z]'].to_r.to_s
end
end