2010-09-05 15:24:15 -05:00
|
|
|
require 'active_support/core_ext/object/blank'
|
|
|
|
require 'active_support/ordered_hash'
|
|
|
|
|
2009-02-27 19:23:00 -06:00
|
|
|
module RailsGuides
|
|
|
|
class Indexer
|
2010-09-05 15:24:15 -05:00
|
|
|
attr_reader :body, :result, :warnings, :level_hash
|
2009-02-27 19:23:00 -06:00
|
|
|
|
2010-09-05 15:24:15 -05:00
|
|
|
def initialize(body, warnings)
|
|
|
|
@body = body
|
|
|
|
@result = @body.dup
|
|
|
|
@warnings = warnings
|
2009-02-27 19:23:00 -06:00
|
|
|
end
|
|
|
|
|
|
|
|
def index
|
|
|
|
@level_hash = process(body)
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
2010-09-05 15:24:15 -05:00
|
|
|
def process(string, current_level=3, counters=[1])
|
2009-02-27 19:23:00 -06:00
|
|
|
s = StringScanner.new(string)
|
|
|
|
|
|
|
|
level_hash = ActiveSupport::OrderedHash.new
|
|
|
|
|
|
|
|
while !s.eos?
|
2010-09-05 15:24:15 -05:00
|
|
|
re = %r{^h(\d)(?:\((#.*?)\))?\s*\.\s*(.*)$}
|
|
|
|
s.match?(re)
|
2009-02-27 19:23:00 -06:00
|
|
|
if matched = s.matched
|
2010-09-05 15:24:15 -05:00
|
|
|
matched =~ re
|
|
|
|
level, idx, title = $1.to_i, $2, $3.strip
|
2009-02-27 19:23:00 -06:00
|
|
|
|
|
|
|
if level < current_level
|
|
|
|
# This is needed. Go figure.
|
|
|
|
return level_hash
|
|
|
|
elsif level == current_level
|
|
|
|
index = counters.join(".")
|
2010-09-05 15:24:15 -05:00
|
|
|
idx ||= '#' + title_to_idx(title)
|
2009-02-27 19:23:00 -06:00
|
|
|
|
2010-09-05 15:24:15 -05:00
|
|
|
raise "Parsing Fail" unless @result.sub!(matched, "h#{level}(#{idx}). #{index} #{title}")
|
2009-02-27 19:23:00 -06:00
|
|
|
|
|
|
|
key = {
|
|
|
|
:title => title,
|
2010-09-05 15:24:15 -05:00
|
|
|
:id => idx
|
2009-02-27 19:23:00 -06:00
|
|
|
}
|
|
|
|
# Recurse
|
|
|
|
counters << 1
|
|
|
|
level_hash[key] = process(s.post_match, current_level + 1, counters)
|
|
|
|
counters.pop
|
|
|
|
|
|
|
|
# Increment the current level
|
|
|
|
last = counters.pop
|
|
|
|
counters << last + 1
|
|
|
|
end
|
|
|
|
end
|
|
|
|
s.getch
|
|
|
|
end
|
|
|
|
level_hash
|
|
|
|
end
|
2010-09-05 15:24:15 -05:00
|
|
|
|
|
|
|
def title_to_idx(title)
|
|
|
|
idx = title.strip.downcase.gsub(/\s+|_/, '-').delete('^a-z0-9-').sub(/^[^a-z]*/, '')
|
|
|
|
if warnings && idx.blank?
|
|
|
|
puts "BLANK ID: please put an explicit ID for section #{title}, as in h5(#my-id)"
|
|
|
|
end
|
|
|
|
idx
|
|
|
|
end
|
2009-02-27 19:23:00 -06:00
|
|
|
end
|
|
|
|
end
|