Small tweaks to Maruku

A real speedup could be had by redoing output/to_html.rb
This commit is contained in:
Jacques Distler 2011-08-08 01:44:01 -05:00
parent 47996ea1d3
commit 201c25ce83
4 changed files with 176 additions and 81 deletions

View file

@ -47,7 +47,9 @@ module MaRuKu
:latex_cache_file => "blahtex_cache.pstore", # cache file for blahtex filter
:debug_keep_ials => false,
:doc_prefix => ''
:doc_prefix => '',
:ignore_wikilinks => true
}
class MDElement

View file

@ -86,8 +86,8 @@ MaRuKu::In::Markdown.register_block_extension(
# This adds support for \eqref
RegEqrefLatex = /\\eqref\{(\w+)\}/
RegEqPar = /\(eq:(\w+)\)/
RegEqrefLatex = /\\eqref\{(\w+?)\}/
RegEqPar = /\(eq:(\w+?)\)/
RegEqref = Regexp.union(RegEqrefLatex, RegEqPar)
MaRuKu::In::Markdown.register_span_extension(
@ -101,7 +101,7 @@ MaRuKu::In::Markdown.register_span_extension(
end)
# This adds support for \ref
RegRef = /\\ref\{(\w*)\}/
RegRef = /\\ref\{(\w*?)\}/
MaRuKu::In::Markdown.register_span_extension(
:chars => [?\\, ?(],
:regexp => RegRef,

View file

@ -33,8 +33,8 @@ class CharSourceDebug; end
# Choose!
CharSource = CharSourceManual # faster! 58ms vs. 65ms
#CharSource = CharSourceStrscan
#CharSource = CharSourceManual # faster! 58ms vs. 65ms
CharSource = CharSourceStrscan # Seems faster on LONG documents (where we care)
#CharSource = CharSourceDebug
@ -202,76 +202,87 @@ end
require 'strscan'
class CharSourceStrscan
include SpanLevelParser
include MaRuKu::Strings
def initialize(s, parent=nil)
@s = StringScanner.new(s)
@parent = parent
@scanner = StringScanner.new(s)
@size = s.size
end
# Return current char as a FixNum (or nil).
def cur_char
@s.peek(1)[0]
end
def cur_char; @scanner.peek(1)[0]; end
# Return the next n chars as a String.
def cur_chars(n);
@s.peek(n)
end
def cur_chars(n); @scanner.peek(n); end
# Return the char after current char as a FixNum (or nil).
def next_char;
@s.peek(2)[1]
end
def next_char; @scanner.peek(2)[1]; end
def shift_char
(@s.get_byte)[0]
end
# Return a character as a FixNum, advancing the pointer.
def shift_char; @scanner.getch[0]; end
def ignore_char
@s.get_byte
nil
end
# Advance the pointer
def ignore_char; @scanner.pos= @scanner.pos + 1; end
def ignore_chars(n)
n.times do @s.get_byte end
nil
end
# Advance the pointer by n
def ignore_chars(n); @scanner.pos= @scanner.pos + n; end
def current_remaining_buffer
@s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index]
end
# Resturn the rest of the string
def current_remaining_buffer; @scanner.rest; end
def cur_chars_are(string)
cur_chars(string.size) == string
end
# Returns true if string matches what we're pointing to
def cur_chars_are(string); @scanner.peek(string.size) == string; end
def next_matches(r)
len = @s.match?(r)
return !!len
end
# Returns true if Regexp r matches what we're pointing to
def next_matches(r); !!@scanner.check(r); end
def read_regexp(r)
string = @s.scan(r)
if string
return r.match(string)
else
return nil
end
end
def read_regexp(r); r.match(@scanner.scan(r)); end
def consume_whitespace
@s.scan(/\s+/)
nil
end
def consume_whitespace; @scanner.skip(/\s*/); end
def describe
describe_pos(@s.string, @s.pos)
len = 75
num_before = [len/2, @scanner.pos].min
num_after = [len/2, @scanner.rest_size].min
num_before_max = @scanner.pos
num_after_max = @scanner.rest_size
num_before = [num_before_max, len-num_after].min
num_after = [num_after_max, len-num_before].min
index_start = [@scanner.pos - num_before, 0].max
index_end = [@scanner.pos + num_after, @size].min
size = index_end- index_start
str = @scanner.string[index_start, size]
str.gsub!("\n",'N')
str.gsub!("\t",'T')
if index_end == @size
str += "EOF"
end
end
pre_s = @scanner.pos-index_start
pre_s = [pre_s, 0].max
pre_s2 = [len-pre_s,0].max
# puts "pre_S = #{pre_s}"
pre =" "*(pre_s)
"-"*len+"\n"+
str + "\n" +
"-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
# pre + "|\n"+
pre + "+--- Byte #{@scanner.pos}\n"+
"Shown bytes [#{index_start} to #{size}] of #{@size}:\n"+
@scanner.string.gsub(/^/, ">")
# "CharSource: At character #{@buffer_index} of block "+
# " beginning with:\n #{@buffer[0,50].inspect} ...\n"+
# " before: \n ... #{cur_chars(50).inspect} ... "
end
end
class CharSourceDebug
def initialize(s, parent)

View file

@ -24,6 +24,16 @@ require 'set'
module MaRuKu; module In; module Markdown; module SpanLevelParser
include MaRuKu::Helpers
# Concatenates to a string
class SpanContext_String; end
# Pushes to an Arrary, and then calls #join to output
class SpanContext_Array; end
# You choose...
#SpanContext = SpanContext_Array
SpanContext = SpanContext_String # Seems to be faster
EscapedCharInText =
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
@ -32,6 +42,8 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser
EscapedCharInInlineCode = [?\\,?`]
IgnoreWikiLinks = MaRuKu::Globals[:ignore_wikilinks]
def parse_lines_as_span(lines, parent=nil)
parse_span_better lines.join("\n"), parent
end
@ -60,7 +72,7 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser
# This is only an optimization which cuts 50% of the time used.
# (but you can't use a-zA-z in exit_on_chars)
if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
con.cur_string << src.shift_char
con.push_char src.shift_char
next
end
@ -142,6 +154,9 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser
when ?[
if markdown_extra? && src.next_char == ?^
read_footnote_ref(src,con)
elsif IgnoreWikiLinks && src.next_char == ?[
con.push_char src.shift_char
con.push_char src.shift_char
else
read_link(src, con)
end
@ -191,7 +206,7 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser
# or 2) the last char was a space
# or 3) the current string is empty
#if con.elements.empty? ||
if (con.cur_string =~ /\s\Z/) || (con.cur_string.size == 0)
if con.is_end?
# also, we check the next characters
follows = src.cur_chars(4)
if follows =~ /^\_\_\_[^\s\_]/
@ -678,16 +693,15 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser
end # read link
class SpanContext
class SpanContext_Array
include MaRuKu::Strings
# Read elements
attr_accessor :elements
attr_accessor :cur_string
def initialize
@elements = []
@cur_string = ""
@cur_string_array = []
end
def push_element(e)
@ -703,17 +717,86 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser
def push_elements(a)
for e in a
if e.kind_of? String
e.each_byte do |b| push_char b end
@cur_string_array << e # e.each_byte do |b| push_char b end
else
push_element e
end
end
end
def is_end?
@cur_string_array.empty? || @cur_string_array.last =~ /\s\Z/
end
def push_string_if_present
if @cur_string.size > 0
unless @cur_string_array.empty?
@elements << @cur_string_array.join
@cur_string_array = []
end
nil
end
def push_char(c)
@cur_string_array << c.chr
nil
end
# push space into current string if
# there isn't one
def push_space
last = @cur_string_array.last
@cur_string_array << ' ' unless last =~ /\ \Z/
end
def describe
lines = @elements.map{|x| x.inspect}.join("\n")
s = "Elements read in span: \n" +
lines.gsub(/^/, ' -')+"\n"
s += "Current string: \n #{@cur_string_array.join.inspect}\n" unless @cur_string_array.empty?
s
end
end # SpanContext_Array
class SpanContext_String
include MaRuKu::Strings
# Read elements
attr_accessor :elements
def initialize
@elements = []
@cur_string = ''
end
def push_element(e)
raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
not (e.kind_of?(String) or e.kind_of?(MDElement))
push_string_if_present
@elements << e
nil
end
alias push push_element
def push_elements(a)
for e in a
if e.kind_of? String
@cur_string << e # e.each_byte do |b| push_char b end
else
push_element e
end
end
end
def is_end?
@cur_string.empty? || @cur_string =~ /\s\Z/
end
def push_string_if_present
unless @cur_string.empty?
@elements << @cur_string
@cur_string = ""
@cur_string = ''
end
nil
end
@ -726,8 +809,8 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser
# push space into current string if
# there isn't one
def push_space
last = @cur_string[@cur_string.size-1]
@cur_string << ?\ if last != ?\
last = @cur_string[@cur_string.length - 1]
@cur_string << ' ' unless last == ?\
end
def describe
@ -735,12 +818,11 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser
s = "Elements read in span: \n" +
lines.gsub(/^/, ' -')+"\n"
if @cur_string.size > 0
s += "Current string: \n #{@cur_string.inspect}\n"
end
s += "Current string: \n #{@cur_string_array.join.inspect}\n" unless @cur_string_array.empty?
s
end
end # SpanContext
end # SpanContext_String
end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser