Small tweaks to Maruku

A real speedup could be had by redoing output/to_html.rb
This commit is contained in:
Jacques Distler 2011-08-08 01:44:01 -05:00
parent 47996ea1d3
commit 201c25ce83
4 changed files with 176 additions and 81 deletions

View file

@ -47,7 +47,9 @@ module MaRuKu
:latex_cache_file => "blahtex_cache.pstore", # cache file for blahtex filter :latex_cache_file => "blahtex_cache.pstore", # cache file for blahtex filter
:debug_keep_ials => false, :debug_keep_ials => false,
:doc_prefix => '' :doc_prefix => '',
:ignore_wikilinks => true
} }
class MDElement class MDElement

View file

@ -86,8 +86,8 @@ MaRuKu::In::Markdown.register_block_extension(
# This adds support for \eqref # This adds support for \eqref
RegEqrefLatex = /\\eqref\{(\w+)\}/ RegEqrefLatex = /\\eqref\{(\w+?)\}/
RegEqPar = /\(eq:(\w+)\)/ RegEqPar = /\(eq:(\w+?)\)/
RegEqref = Regexp.union(RegEqrefLatex, RegEqPar) RegEqref = Regexp.union(RegEqrefLatex, RegEqPar)
MaRuKu::In::Markdown.register_span_extension( MaRuKu::In::Markdown.register_span_extension(
@ -101,7 +101,7 @@ MaRuKu::In::Markdown.register_span_extension(
end) end)
# This adds support for \ref # This adds support for \ref
RegRef = /\\ref\{(\w*)\}/ RegRef = /\\ref\{(\w*?)\}/
MaRuKu::In::Markdown.register_span_extension( MaRuKu::In::Markdown.register_span_extension(
:chars => [?\\, ?(], :chars => [?\\, ?(],
:regexp => RegRef, :regexp => RegRef,

View file

@ -33,8 +33,8 @@ class CharSourceDebug; end
# Choose! # Choose!
CharSource = CharSourceManual # faster! 58ms vs. 65ms #CharSource = CharSourceManual # faster! 58ms vs. 65ms
#CharSource = CharSourceStrscan CharSource = CharSourceStrscan # Seems faster on LONG documents (where we care)
#CharSource = CharSourceDebug #CharSource = CharSourceDebug
@ -202,76 +202,87 @@ end
require 'strscan' require 'strscan'
class CharSourceStrscan class CharSourceStrscan
include SpanLevelParser
include MaRuKu::Strings
def initialize(s, parent=nil) def initialize(s, parent=nil)
@s = StringScanner.new(s) @scanner = StringScanner.new(s)
@parent = parent @size = s.size
end end
# Return current char as a FixNum (or nil). # Return current char as a FixNum (or nil).
def cur_char def cur_char; @scanner.peek(1)[0]; end
@s.peek(1)[0]
end
# Return the next n chars as a String. # Return the next n chars as a String.
def cur_chars(n); def cur_chars(n); @scanner.peek(n); end
@s.peek(n)
end
# Return the char after current char as a FixNum (or nil). # Return the char after current char as a FixNum (or nil).
def next_char; def next_char; @scanner.peek(2)[1]; end
@s.peek(2)[1]
end
def shift_char # Return a character as a FixNum, advancing the pointer.
(@s.get_byte)[0] def shift_char; @scanner.getch[0]; end
end
def ignore_char # Advance the pointer
@s.get_byte def ignore_char; @scanner.pos= @scanner.pos + 1; end
nil
end
def ignore_chars(n) # Advance the pointer by n
n.times do @s.get_byte end def ignore_chars(n); @scanner.pos= @scanner.pos + n; end
nil
end
def current_remaining_buffer # Resturn the rest of the string
@s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index] def current_remaining_buffer; @scanner.rest; end
end
def cur_chars_are(string) # Returns true if string matches what we're pointing to
cur_chars(string.size) == string def cur_chars_are(string); @scanner.peek(string.size) == string; end
end
def next_matches(r) # Returns true if Regexp r matches what we're pointing to
len = @s.match?(r) def next_matches(r); !!@scanner.check(r); end
return !!len
end
def read_regexp(r) def read_regexp(r); r.match(@scanner.scan(r)); end
string = @s.scan(r)
if string
return r.match(string)
else
return nil
end
end
def consume_whitespace def consume_whitespace; @scanner.skip(/\s*/); end
@s.scan(/\s+/)
nil
end
def describe def describe
describe_pos(@s.string, @s.pos) len = 75
num_before = [len/2, @scanner.pos].min
num_after = [len/2, @scanner.rest_size].min
num_before_max = @scanner.pos
num_after_max = @scanner.rest_size
num_before = [num_before_max, len-num_after].min
num_after = [num_after_max, len-num_before].min
index_start = [@scanner.pos - num_before, 0].max
index_end = [@scanner.pos + num_after, @size].min
size = index_end- index_start
str = @scanner.string[index_start, size]
str.gsub!("\n",'N')
str.gsub!("\t",'T')
if index_end == @size
str += "EOF"
end end
end pre_s = @scanner.pos-index_start
pre_s = [pre_s, 0].max
pre_s2 = [len-pre_s,0].max
# puts "pre_S = #{pre_s}"
pre =" "*(pre_s)
"-"*len+"\n"+
str + "\n" +
"-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
# pre + "|\n"+
pre + "+--- Byte #{@scanner.pos}\n"+
"Shown bytes [#{index_start} to #{size}] of #{@size}:\n"+
@scanner.string.gsub(/^/, ">")
# "CharSource: At character #{@buffer_index} of block "+
# " beginning with:\n #{@buffer[0,50].inspect} ...\n"+
# " before: \n ... #{cur_chars(50).inspect} ... "
end
end
class CharSourceDebug class CharSourceDebug
def initialize(s, parent) def initialize(s, parent)

View file

@ -24,6 +24,16 @@ require 'set'
module MaRuKu; module In; module Markdown; module SpanLevelParser module MaRuKu; module In; module Markdown; module SpanLevelParser
include MaRuKu::Helpers include MaRuKu::Helpers
# Concatenates to a string
class SpanContext_String; end
# Pushes to an Arrary, and then calls #join to output
class SpanContext_Array; end
# You choose...
#SpanContext = SpanContext_Array
SpanContext = SpanContext_String # Seems to be faster
EscapedCharInText = EscapedCharInText =
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>] Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
@ -32,6 +42,8 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser
EscapedCharInInlineCode = [?\\,?`] EscapedCharInInlineCode = [?\\,?`]
IgnoreWikiLinks = MaRuKu::Globals[:ignore_wikilinks]
def parse_lines_as_span(lines, parent=nil) def parse_lines_as_span(lines, parent=nil)
parse_span_better lines.join("\n"), parent parse_span_better lines.join("\n"), parent
end end
@ -60,7 +72,7 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser
# This is only an optimization which cuts 50% of the time used. # This is only an optimization which cuts 50% of the time used.
# (but you can't use a-zA-z in exit_on_chars) # (but you can't use a-zA-z in exit_on_chars)
if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z))) if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
con.cur_string << src.shift_char con.push_char src.shift_char
next next
end end
@ -142,6 +154,9 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser
when ?[ when ?[
if markdown_extra? && src.next_char == ?^ if markdown_extra? && src.next_char == ?^
read_footnote_ref(src,con) read_footnote_ref(src,con)
elsif IgnoreWikiLinks && src.next_char == ?[
con.push_char src.shift_char
con.push_char src.shift_char
else else
read_link(src, con) read_link(src, con)
end end
@ -191,7 +206,7 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser
# or 2) the last char was a space # or 2) the last char was a space
# or 3) the current string is empty # or 3) the current string is empty
#if con.elements.empty? || #if con.elements.empty? ||
if (con.cur_string =~ /\s\Z/) || (con.cur_string.size == 0) if con.is_end?
# also, we check the next characters # also, we check the next characters
follows = src.cur_chars(4) follows = src.cur_chars(4)
if follows =~ /^\_\_\_[^\s\_]/ if follows =~ /^\_\_\_[^\s\_]/
@ -678,16 +693,15 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser
end # read link end # read link
class SpanContext class SpanContext_Array
include MaRuKu::Strings include MaRuKu::Strings
# Read elements # Read elements
attr_accessor :elements attr_accessor :elements
attr_accessor :cur_string
def initialize def initialize
@elements = [] @elements = []
@cur_string = "" @cur_string_array = []
end end
def push_element(e) def push_element(e)
@ -703,17 +717,86 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser
def push_elements(a) def push_elements(a)
for e in a for e in a
if e.kind_of? String if e.kind_of? String
e.each_byte do |b| push_char b end @cur_string_array << e # e.each_byte do |b| push_char b end
else else
push_element e push_element e
end end
end end
end end
def is_end?
@cur_string_array.empty? || @cur_string_array.last =~ /\s\Z/
end
def push_string_if_present def push_string_if_present
if @cur_string.size > 0 unless @cur_string_array.empty?
@elements << @cur_string_array.join
@cur_string_array = []
end
nil
end
def push_char(c)
@cur_string_array << c.chr
nil
end
# push space into current string if
# there isn't one
def push_space
last = @cur_string_array.last
@cur_string_array << ' ' unless last =~ /\ \Z/
end
def describe
lines = @elements.map{|x| x.inspect}.join("\n")
s = "Elements read in span: \n" +
lines.gsub(/^/, ' -')+"\n"
s += "Current string: \n #{@cur_string_array.join.inspect}\n" unless @cur_string_array.empty?
s
end
end # SpanContext_Array
class SpanContext_String
include MaRuKu::Strings
# Read elements
attr_accessor :elements
def initialize
@elements = []
@cur_string = ''
end
def push_element(e)
raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
not (e.kind_of?(String) or e.kind_of?(MDElement))
push_string_if_present
@elements << e
nil
end
alias push push_element
def push_elements(a)
for e in a
if e.kind_of? String
@cur_string << e # e.each_byte do |b| push_char b end
else
push_element e
end
end
end
def is_end?
@cur_string.empty? || @cur_string =~ /\s\Z/
end
def push_string_if_present
unless @cur_string.empty?
@elements << @cur_string @elements << @cur_string
@cur_string = "" @cur_string = ''
end end
nil nil
end end
@ -726,8 +809,8 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser
# push space into current string if # push space into current string if
# there isn't one # there isn't one
def push_space def push_space
last = @cur_string[@cur_string.size-1] last = @cur_string[@cur_string.length - 1]
@cur_string << ?\ if last != ?\ @cur_string << ' ' unless last == ?\
end end
def describe def describe
@ -735,12 +818,11 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser
s = "Elements read in span: \n" + s = "Elements read in span: \n" +
lines.gsub(/^/, ' -')+"\n" lines.gsub(/^/, ' -')+"\n"
if @cur_string.size > 0 s += "Current string: \n #{@cur_string_array.join.inspect}\n" unless @cur_string_array.empty?
s += "Current string: \n #{@cur_string.inspect}\n"
end
s s
end end
end # SpanContext end # SpanContext_String
end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser