Compare commits
8 commits
Author | SHA1 | Date | |
---|---|---|---|
|
6f4d7b1b2b | ||
|
ded059569c | ||
|
42032e26cc | ||
|
407e79c6ce | ||
|
ea746a749a | ||
|
bc01c2c8b8 | ||
|
86d65950fe | ||
|
c63268aec2 |
6 changed files with 82 additions and 40 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
*.swp
|
||||||
|
*.gemspec
|
||||||
|
pkg
|
14
Rakefile
14
Rakefile
|
@ -6,12 +6,12 @@ begin
|
||||||
Jeweler::Tasks.new do |gem|
|
Jeweler::Tasks.new do |gem|
|
||||||
gem.name = "RegExpr"
|
gem.name = "RegExpr"
|
||||||
gem.summary = %Q{Regular Expression Creator}
|
gem.summary = %Q{Regular Expression Creator}
|
||||||
gem.description = %Q{Write Regular Expressions in a Hash and generats an optimized Regex}
|
gem.description = %Q{Write Regular Expressions in a Hash and generates an optimized Regex}
|
||||||
gem.email = "Denis.Knauf@gmail.com"
|
gem.email = "Denis.Knauf@gmail.com"
|
||||||
gem.homepage = "http://github.com/DenisKnauf/RegExpr"
|
gem.homepage = "http://github.com/DenisKnauf/RegExpr"
|
||||||
gem.authors = ["Denis Knauf"]
|
gem.authors = ["Denis Knauf"]
|
||||||
gem.files = ["README.md", "VERSION", "lib/**/*.rb", "test/**/*.rb"]
|
gem.files = %w[AUTHORS README.md VERSION lib/**/*.rb test/**/*.rb]
|
||||||
gem.require_paths = ["lib"]
|
gem.require_paths = %w[lib]
|
||||||
end
|
end
|
||||||
Jeweler::GemcutterTasks.new
|
Jeweler::GemcutterTasks.new
|
||||||
rescue LoadError
|
rescue LoadError
|
||||||
|
@ -38,11 +38,11 @@ rescue LoadError
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
task :test => :check_dependencies
|
#task :test => :check_dependencies
|
||||||
|
|
||||||
task :default => :test
|
#task :default => :test
|
||||||
|
|
||||||
require 'rake/rdoctask'
|
require 'rdoc/task'
|
||||||
Rake::RDocTask.new do |rdoc|
|
Rake::RDocTask.new do |rdoc|
|
||||||
if File.exist?('VERSION')
|
if File.exist?('VERSION')
|
||||||
version = File.read('VERSION')
|
version = File.read('VERSION')
|
||||||
|
@ -51,7 +51,7 @@ Rake::RDocTask.new do |rdoc|
|
||||||
end
|
end
|
||||||
|
|
||||||
rdoc.rdoc_dir = 'rdoc'
|
rdoc.rdoc_dir = 'rdoc'
|
||||||
rdoc.title = "sbdb #{version}"
|
rdoc.title = "RegExpr #{version}"
|
||||||
rdoc.rdoc_files.include('README*')
|
rdoc.rdoc_files.include('README*')
|
||||||
rdoc.rdoc_files.include('lib/**/*.rb')
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
||||||
end
|
end
|
||||||
|
|
2
VERSION
2
VERSION
|
@ -1 +1 @@
|
||||||
0.0.1
|
0.0.2
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
class RegExpr< Hash
|
class RegExpr< Hash
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# any thing
|
||||||
class RegExpr::Segment
|
class RegExpr::Segment
|
||||||
attr_accessor :value
|
attr_accessor :value
|
||||||
def initialize( val) self.value= val end
|
def initialize( val) self.value= val end
|
||||||
|
@ -22,12 +23,14 @@ class RegExpr::Segment
|
||||||
v
|
v
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# can't have any segment as value
|
||||||
def deepest
|
def deepest
|
||||||
self.class_eval do
|
self.class_eval do
|
||||||
def names() [] end
|
def names() [] end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# can't have any value
|
||||||
def novalue
|
def novalue
|
||||||
self.class_eval do
|
self.class_eval do
|
||||||
def initialize() end
|
def initialize() end
|
||||||
|
@ -45,6 +48,7 @@ class RegExpr::Segment
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# /(VALUE)/ or /(?:VALUE)/
|
||||||
class RegExpr::Block< RegExpr::Segment
|
class RegExpr::Block< RegExpr::Segment
|
||||||
attr_accessor :name, :hidden
|
attr_accessor :name, :hidden
|
||||||
def hidden?() @hidden end
|
def hidden?() @hidden end
|
||||||
|
@ -54,6 +58,10 @@ class RegExpr::Block< RegExpr::Segment
|
||||||
def empty?() @value.empty? end
|
def empty?() @value.empty? end
|
||||||
def size() @value.size end
|
def size() @value.size end
|
||||||
|
|
||||||
|
def inspect
|
||||||
|
"#<#{self.class.name}: (#{hidden? ? '?:' : ''} #{value.map(&:inspect).join ' '} )>"
|
||||||
|
end
|
||||||
|
|
||||||
def names
|
def names
|
||||||
names= @value.collect &:names
|
names= @value.collect &:names
|
||||||
names.push( name) unless self.hidden?
|
names.push( name) unless self.hidden?
|
||||||
|
@ -76,9 +84,11 @@ class RegExpr::Block< RegExpr::Segment
|
||||||
else list[ -1].push v
|
else list[ -1].push v
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
#return self
|
||||||
|
|
||||||
|
# [A,C,A,C,Cs,As,C,Cs,C] => [A,A,As], chars = [C,C|Cs]++[C|Cs]++[C]
|
||||||
list.delete_if do |v|
|
list.delete_if do |v|
|
||||||
if (RegExpr::Chars === v[ 0] and v.size == 1 ) or RegExpr::Char === v[ 0]
|
if (1 == v.size and RegExpr::Chars === v[ 0]) or RegExpr::Char === v[ 0]
|
||||||
chars+= v[ 0]
|
chars+= v[ 0]
|
||||||
else false
|
else false
|
||||||
end
|
end
|
||||||
|
@ -100,7 +110,7 @@ class RegExpr::Block< RegExpr::Segment
|
||||||
u ? w : w.value
|
u ? w : w.value
|
||||||
else w
|
else w
|
||||||
end
|
end
|
||||||
end.flatten
|
end.flatten.compact
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
values.push RegExpr::Or.new, chars if chars.size > 0
|
values.push RegExpr::Or.new, chars if chars.size > 0
|
||||||
|
@ -114,6 +124,7 @@ class RegExpr::Block< RegExpr::Segment
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# /(?!VALUE)/
|
||||||
class RegExpr::Not< RegExpr::Segment
|
class RegExpr::Not< RegExpr::Segment
|
||||||
deepest
|
deepest
|
||||||
novalue
|
novalue
|
||||||
|
@ -126,6 +137,7 @@ class RegExpr::Not< RegExpr::Segment
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# eg: 1..99 => /[1-9]|[1-9][1-9]/
|
||||||
class RegExpr::Range< RegExpr::Segment
|
class RegExpr::Range< RegExpr::Segment
|
||||||
novalue
|
novalue
|
||||||
attr_accessor :v1, :v2
|
attr_accessor :v1, :v2
|
||||||
|
@ -143,9 +155,9 @@ class RegExpr::Range< RegExpr::Segment
|
||||||
bf= b == 0 ? 1.0 : b.to_f
|
bf= b == 0 ? 1.0 : b.to_f
|
||||||
1.upto( b.to_s.length- 1) do |i|
|
1.upto( b.to_s.length- 1) do |i|
|
||||||
pot= 10** i
|
pot= 10** i
|
||||||
num= (af/ pot).ceil* pot # next higher number with i zeros
|
num= (af/ pot).ceil * pot # next higher number with i zeros
|
||||||
arr.insert i, num if num < @v2
|
arr.insert i, num if num < @v2
|
||||||
num= (bf/ pot).floor* pot # next lower number with i zeros
|
num= (bf/ pot).floor * pot # next lower number with i zeros
|
||||||
arr.insert -i, num
|
arr.insert -i, num
|
||||||
end
|
end
|
||||||
arr.uniq!
|
arr.uniq!
|
||||||
|
@ -153,15 +165,13 @@ class RegExpr::Range< RegExpr::Segment
|
||||||
|
|
||||||
result= RegExpr::Block.new
|
result= RegExpr::Block.new
|
||||||
0.upto( arr. length- 2) do |i|
|
0.upto( arr. length- 2) do |i|
|
||||||
first= arr[ i].to_s
|
first, second= arr[ i].to_s, (arr[ i+ 1]- 1).to_s
|
||||||
second= (arr[ i+ 1]- 1).to_s
|
|
||||||
result.push RegExpr::Or.new
|
result.push RegExpr::Or.new
|
||||||
0.upto( first.length- 1) do |j|
|
0.upto( first.length- 1) do |j|
|
||||||
result.push( if first[ j] == second[ j]
|
fst, sec= first[ j], second[ j]
|
||||||
RegExpr::Char.new first[ j].chr
|
result.push fst == sec ?
|
||||||
else
|
RegExpr::Char.new( fst.chr) :
|
||||||
RegExpr::Chars.new '%c-%c'% [ first[ j], second[ j] ]
|
RegExpr::Chars.new( '%c-%c'% [ fst, sec ])
|
||||||
end)
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
result. value. shift
|
result. value. shift
|
||||||
|
@ -169,6 +179,7 @@ class RegExpr::Range< RegExpr::Segment
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# /[CHARS]/ or /[^CHARS]/
|
||||||
class RegExpr::Chars< RegExpr::Segment
|
class RegExpr::Chars< RegExpr::Segment
|
||||||
deepest
|
deepest
|
||||||
attr_reader :chars, :not
|
attr_reader :chars, :not
|
||||||
|
@ -181,6 +192,10 @@ class RegExpr::Chars< RegExpr::Segment
|
||||||
def not!() @not= !@not end
|
def not!() @not= !@not end
|
||||||
alias -@ not!
|
alias -@ not!
|
||||||
|
|
||||||
|
def inspect
|
||||||
|
"#<#{self.class.name}: [#{value}]>"
|
||||||
|
end
|
||||||
|
|
||||||
def split
|
def split
|
||||||
chars= []
|
chars= []
|
||||||
@chars. gsub( /\\-/) do |r|
|
@chars. gsub( /\\-/) do |r|
|
||||||
|
@ -241,7 +256,9 @@ class RegExpr::Chars< RegExpr::Segment
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# /VALUE{MIN,MAX}/
|
||||||
class RegExpr::Repeat< RegExpr::Segment
|
class RegExpr::Repeat< RegExpr::Segment
|
||||||
|
SimpleChar= Hash[ *%w<{,1} ? {0,1} ? {0,} * {,} * {1,} +> + ['{1,1}', ''] ]
|
||||||
attr_reader :min, :max
|
attr_reader :min, :max
|
||||||
|
|
||||||
def minandmax x
|
def minandmax x
|
||||||
|
@ -265,8 +282,7 @@ class RegExpr::Repeat< RegExpr::Segment
|
||||||
def to_r
|
def to_r
|
||||||
t= '{%s,%s}'% [ @min||'', @max||'' ]
|
t= '{%s,%s}'% [ @min||'', @max||'' ]
|
||||||
return '' if '{0,0}' == t
|
return '' if '{0,0}' == t
|
||||||
t= Hash[ *%w<{,1} ? {0,1} ? {0,} * {,} * {1,} +>+ ['{1,1}', ''] ][ t]|| t
|
@value.to_r+ (SimpleChar[ t] || t)
|
||||||
@value.to_r+ t
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -343,15 +359,16 @@ class RegExpr
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def to_r exp= :main
|
def to_r exp= nil
|
||||||
r = self.to_re( exp)
|
r = self.to_re exp
|
||||||
#r.optimize!
|
r.optimize!
|
||||||
h, r = r.hidden?, r.to_r
|
h, r = r.hidden?, r.to_r
|
||||||
r = r[ 1...-1] unless h
|
r = r[ 1...-1] unless h
|
||||||
::Regexp.new r
|
::Regexp.new r
|
||||||
end
|
end
|
||||||
|
|
||||||
def to_re exp= :main
|
def to_re exp= nil
|
||||||
|
exp||= :main
|
||||||
u= RegExpr::Block.new
|
u= RegExpr::Block.new
|
||||||
t, u.hidden= if Symbol === exp
|
t, u.hidden= if Symbol === exp
|
||||||
u.name= exp.to_sym
|
u.name= exp.to_sym
|
||||||
|
@ -361,7 +378,11 @@ class RegExpr
|
||||||
end
|
end
|
||||||
else [ exp.to_s, true]
|
else [ exp.to_s, true]
|
||||||
end
|
end
|
||||||
|
parse t
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse t, u= nil
|
||||||
|
u||= RegExpr::Block.new
|
||||||
until !t or t.empty?
|
until !t or t.empty?
|
||||||
v, t= self.to_r_next t
|
v, t= self.to_r_next t
|
||||||
case v
|
case v
|
||||||
|
@ -394,7 +415,8 @@ class RegExpr
|
||||||
i= exp[ 2.. -1].to_i h
|
i= exp[ 2.. -1].to_i h
|
||||||
return RegExpr::Char.new( i.chr), exp[ (i.to_s( h). size+ 2).. -1]
|
return RegExpr::Char.new( i.chr), exp[ (i.to_s( h). size+ 2).. -1]
|
||||||
|
|
||||||
when ?. then return RegExpr::WildCard.new( '.'), exp[ 1.. -1]
|
when ?.
|
||||||
|
return RegExpr::WildCard.new( '.'), exp[ 1.. -1]
|
||||||
|
|
||||||
when ?0
|
when ?0
|
||||||
case exp[ 1]
|
case exp[ 1]
|
||||||
|
@ -406,13 +428,17 @@ class RegExpr
|
||||||
return '', $1.to_i( 2).to_s+ $'
|
return '', $1.to_i( 2).to_s+ $'
|
||||||
else
|
else
|
||||||
case exp
|
case exp
|
||||||
when %r<(\d+)..(\d+)> then RegExpr::Range.new $1.to_i, $2.to_i
|
when %r<(\d+)..(\d+)>
|
||||||
when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat.new '', *$1.split( ',')
|
RegExpr::Range.new $1.to_i, $2.to_i
|
||||||
else raise ArgumentError, 'Unknown form "%s"'% exp
|
when %r<^(\d+,\d+|,\d+|\d+,?)>
|
||||||
|
RegExpr::Repeat.new '', *$1.split( ',')
|
||||||
|
else
|
||||||
|
raise ArgumentError, 'Unknown form "%s"'% exp
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
when ?( then return self.to_re( exp[ 1.. -1])
|
when ?(
|
||||||
|
return parse( exp[ 1.. -1])
|
||||||
when ?) then ')'
|
when ?) then ')'
|
||||||
when ?| then RegExpr::Or.new
|
when ?| then RegExpr::Or.new
|
||||||
|
|
||||||
|
@ -420,19 +446,26 @@ class RegExpr
|
||||||
when ?* then RegExpr::Repeat.new '', nil
|
when ?* then RegExpr::Repeat.new '', nil
|
||||||
when ?? then RegExpr::Repeat.new '', 0, 1
|
when ?? then RegExpr::Repeat.new '', 0, 1
|
||||||
|
|
||||||
when ?" then RegExpr::Char.new %r<^"((?:[^"]|\\")*)">.match( exp)[ 1]
|
when ?"
|
||||||
when ?[ then RegExpr::Chars.new %r<^\[((?:[^\]]|\\\])*[^\\]|)\]>.match( exp)[ 1]
|
RegExpr::Char.new %r<^"((?:[^"]|\\")*)">.match( exp)[ 1]
|
||||||
when ?/ then exp =~ %r<^/((?:[^/]|\\/)*)/(im?|mi)?>
|
when ?[
|
||||||
RegExpr::Regexp.new ::Regexp.new( $1,
|
RegExpr::Chars.new %r<^\[((?:[^\]]|\\\])*[^\\]|)\]>.match( exp)[ 1]
|
||||||
($2 =~ /i/ ? ::Regexp::IGNORECASE : 0)+
|
when ?/
|
||||||
($2 =~ /m/ ? ::Regexp::MULTILINE : 0))
|
_, re, f= %r<^/((?:[^/]|\\/)*)/(im?|mi)?>.match( exp)
|
||||||
|
flg= $2=~ /i/ ? ::Regexp::IGNORECASE : 0
|
||||||
|
flg+= $2=~ /m/ ? ::Regexp::MULTILINE : 0
|
||||||
|
RegExpr::Regexp.new ::Regexp.new( re, flg)
|
||||||
|
|
||||||
else
|
else
|
||||||
case exp
|
case exp
|
||||||
when %r<^([a-z_][a-z_0-9]*\b)>i then self.to_re $1.to_sym
|
when %r<^([a-z_][a-z_0-9]*\b)>i
|
||||||
when %r<(\d+)..(\d+)> then RegExpr::Range.new $1.to_i, $2.to_i
|
self.to_re $1.to_sym
|
||||||
when %r<^(\d+,\d+|,\d+|\d+,?)> then RegExpr::Repeat.new '', *$1.split( ',')
|
when %r<(\d+)..(\d+)>
|
||||||
else raise ArgumentError, 'Unknown form "%s"'% exp
|
RegExpr::Range.new $1.to_i, $2.to_i
|
||||||
|
when %r<^(\d+,\d+|,\d+|\d+,?)>
|
||||||
|
RegExpr::Repeat.new '', *$1.split( ',')
|
||||||
|
else
|
||||||
|
raise ArgumentError, 'Unknown form "%s"'% exp
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
[ t, $' ]
|
[ t, $' ]
|
||||||
|
|
6
tests/optimizer.rb
Normal file
6
tests/optimizer.rb
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
class OptimizerTest < Test::Unit::TestCase
|
||||||
|
context "Optimizer" do
|
||||||
|
should "optimize many charblocks to one charblock" do
|
||||||
|
/[a-dt-z]/.to_s == RegExpr[ main: '[a-d] | [t-z]'].to_r.to_s
|
||||||
|
end
|
||||||
|
end
|
Loading…
Add table
Add a link
Reference in a new issue