From 201c25ce835949c7da27fe57e9bbf9150152668d Mon Sep 17 00:00:00 2001 From: Jacques Distler Date: Mon, 8 Aug 2011 01:44:01 -0500 Subject: [PATCH] Small tweaks to Maruku A real speedup could be had by redoing output/to_html.rb --- vendor/plugins/maruku/lib/maruku/defaults.rb | 4 +- .../maruku/lib/maruku/ext/math/parsing.rb | 6 +- .../maruku/lib/maruku/input/charsource.rb | 135 ++++++++++-------- .../lib/maruku/input/parse_span_better.rb | 112 +++++++++++++-- 4 files changed, 176 insertions(+), 81 deletions(-) diff --git a/vendor/plugins/maruku/lib/maruku/defaults.rb b/vendor/plugins/maruku/lib/maruku/defaults.rb index b0203b02..d892ea7d 100644 --- a/vendor/plugins/maruku/lib/maruku/defaults.rb +++ b/vendor/plugins/maruku/lib/maruku/defaults.rb @@ -47,7 +47,9 @@ module MaRuKu :latex_cache_file => "blahtex_cache.pstore", # cache file for blahtex filter :debug_keep_ials => false, - :doc_prefix => '' + :doc_prefix => '', + + :ignore_wikilinks => true } class MDElement diff --git a/vendor/plugins/maruku/lib/maruku/ext/math/parsing.rb b/vendor/plugins/maruku/lib/maruku/ext/math/parsing.rb index 2aaf7efc..9f3c6e49 100644 --- a/vendor/plugins/maruku/lib/maruku/ext/math/parsing.rb +++ b/vendor/plugins/maruku/lib/maruku/ext/math/parsing.rb @@ -86,8 +86,8 @@ MaRuKu::In::Markdown.register_block_extension( # This adds support for \eqref -RegEqrefLatex = /\\eqref\{(\w+)\}/ -RegEqPar = /\(eq:(\w+)\)/ +RegEqrefLatex = /\\eqref\{(\w+?)\}/ +RegEqPar = /\(eq:(\w+?)\)/ RegEqref = Regexp.union(RegEqrefLatex, RegEqPar) MaRuKu::In::Markdown.register_span_extension( @@ -101,7 +101,7 @@ MaRuKu::In::Markdown.register_span_extension( end) # This adds support for \ref -RegRef = /\\ref\{(\w*)\}/ +RegRef = /\\ref\{(\w*?)\}/ MaRuKu::In::Markdown.register_span_extension( :chars => [?\\, ?(], :regexp => RegRef, diff --git a/vendor/plugins/maruku/lib/maruku/input/charsource.rb b/vendor/plugins/maruku/lib/maruku/input/charsource.rb index 14a64735..1b6c256d 100644 --- a/vendor/plugins/maruku/lib/maruku/input/charsource.rb +++ b/vendor/plugins/maruku/lib/maruku/input/charsource.rb @@ -33,8 +33,8 @@ class CharSourceDebug; end # Choose! -CharSource = CharSourceManual # faster! 58ms vs. 65ms -#CharSource = CharSourceStrscan +#CharSource = CharSourceManual # faster! 58ms vs. 65ms +CharSource = CharSourceStrscan # Seems faster on LONG documents (where we care) #CharSource = CharSourceDebug @@ -202,76 +202,87 @@ end require 'strscan' class CharSourceStrscan - include SpanLevelParser - include MaRuKu::Strings - + def initialize(s, parent=nil) - @s = StringScanner.new(s) - @parent = parent - end - - # Return current char as a FixNum (or nil). - def cur_char - @s.peek(1)[0] + @scanner = StringScanner.new(s) + @size = s.size end + # Return current char as a FixNum (or nil). + def cur_char; @scanner.peek(1)[0]; end + # Return the next n chars as a String. - def cur_chars(n); - @s.peek(n) - end + def cur_chars(n); @scanner.peek(n); end # Return the char after current char as a FixNum (or nil). - def next_char; - @s.peek(2)[1] - end - - def shift_char - (@s.get_byte)[0] - end - - def ignore_char - @s.get_byte - nil - end - - def ignore_chars(n) - n.times do @s.get_byte end - nil - end - - def current_remaining_buffer - @s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index] - end - - def cur_chars_are(string) - cur_chars(string.size) == string - end + def next_char; @scanner.peek(2)[1]; end + + # Return a character as a FixNum, advancing the pointer. + def shift_char; @scanner.getch[0]; end + + # Advance the pointer + def ignore_char; @scanner.pos= @scanner.pos + 1; end + + # Advance the pointer by n + def ignore_chars(n); @scanner.pos= @scanner.pos + n; end + + # Resturn the rest of the string + def current_remaining_buffer; @scanner.rest; end + + # Returns true if string matches what we're pointing to + def cur_chars_are(string); @scanner.peek(string.size) == string; end + + # Returns true if Regexp r matches what we're pointing to + def next_matches(r); !!@scanner.check(r); end + + def read_regexp(r); r.match(@scanner.scan(r)); end + + def consume_whitespace; @scanner.skip(/\s*/); end - def next_matches(r) - len = @s.match?(r) - return !!len - end - - def read_regexp(r) - string = @s.scan(r) - if string - return r.match(string) - else - return nil - end - end - - def consume_whitespace - @s.scan(/\s+/) - nil - end - def describe - describe_pos(@s.string, @s.pos) - end + len = 75 + num_before = [len/2, @scanner.pos].min + num_after = [len/2, @scanner.rest_size].min + num_before_max = @scanner.pos + num_after_max = @scanner.rest_size -end + num_before = [num_before_max, len-num_after].min + num_after = [num_after_max, len-num_before].min + + index_start = [@scanner.pos - num_before, 0].max + index_end = [@scanner.pos + num_after, @size].min + + size = index_end- index_start + + str = @scanner.string[index_start, size] + str.gsub!("\n",'N') + str.gsub!("\t",'T') + + if index_end == @size + str += "EOF" + end + + pre_s = @scanner.pos-index_start + pre_s = [pre_s, 0].max + pre_s2 = [len-pre_s,0].max +# puts "pre_S = #{pre_s}" + pre =" "*(pre_s) + + "-"*len+"\n"+ + str + "\n" + + "-"*pre_s + "|" + "-"*(pre_s2)+"\n"+ +# pre + "|\n"+ + pre + "+--- Byte #{@scanner.pos}\n"+ + + "Shown bytes [#{index_start} to #{size}] of #{@size}:\n"+ + @scanner.string.gsub(/^/, ">") + +# "CharSource: At character #{@buffer_index} of block "+ +# " beginning with:\n #{@buffer[0,50].inspect} ...\n"+ +# " before: \n ... #{cur_chars(50).inspect} ... " + end +end class CharSourceDebug def initialize(s, parent) diff --git a/vendor/plugins/maruku/lib/maruku/input/parse_span_better.rb b/vendor/plugins/maruku/lib/maruku/input/parse_span_better.rb index b9317b41..d0158366 100644 --- a/vendor/plugins/maruku/lib/maruku/input/parse_span_better.rb +++ b/vendor/plugins/maruku/lib/maruku/input/parse_span_better.rb @@ -23,7 +23,17 @@ require 'set' module MaRuKu; module In; module Markdown; module SpanLevelParser include MaRuKu::Helpers - + +# Concatenates to a string +class SpanContext_String; end + +# Pushes to an Arrary, and then calls #join to output +class SpanContext_Array; end + +# You choose... +#SpanContext = SpanContext_Array +SpanContext = SpanContext_String # Seems to be faster + EscapedCharInText = Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>] @@ -32,6 +42,8 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser EscapedCharInInlineCode = [?\\,?`] + IgnoreWikiLinks = MaRuKu::Globals[:ignore_wikilinks] + def parse_lines_as_span(lines, parent=nil) parse_span_better lines.join("\n"), parent end @@ -60,7 +72,7 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser # This is only an optimization which cuts 50% of the time used. # (but you can't use a-zA-z in exit_on_chars) if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z))) - con.cur_string << src.shift_char + con.push_char src.shift_char next end @@ -142,6 +154,9 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser when ?[ if markdown_extra? && src.next_char == ?^ read_footnote_ref(src,con) + elsif IgnoreWikiLinks && src.next_char == ?[ + con.push_char src.shift_char + con.push_char src.shift_char else read_link(src, con) end @@ -191,7 +206,7 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser # or 2) the last char was a space # or 3) the current string is empty #if con.elements.empty? || - if (con.cur_string =~ /\s\Z/) || (con.cur_string.size == 0) + if con.is_end? # also, we check the next characters follows = src.cur_chars(4) if follows =~ /^\_\_\_[^\s\_]/ @@ -678,16 +693,15 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser end # read link - class SpanContext + class SpanContext_Array include MaRuKu::Strings # Read elements attr_accessor :elements - attr_accessor :cur_string def initialize @elements = [] - @cur_string = "" + @cur_string_array = [] end def push_element(e) @@ -703,17 +717,86 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser def push_elements(a) for e in a if e.kind_of? String - e.each_byte do |b| push_char b end + @cur_string_array << e # e.each_byte do |b| push_char b end else push_element e end end end + def is_end? + @cur_string_array.empty? || @cur_string_array.last =~ /\s\Z/ + end + def push_string_if_present - if @cur_string.size > 0 + unless @cur_string_array.empty? + @elements << @cur_string_array.join + @cur_string_array = [] + end + nil + end + + def push_char(c) + @cur_string_array << c.chr + nil + end + + # push space into current string if + # there isn't one + def push_space + last = @cur_string_array.last + @cur_string_array << ' ' unless last =~ /\ \Z/ + end + + def describe + lines = @elements.map{|x| x.inspect}.join("\n") + s = "Elements read in span: \n" + + lines.gsub(/^/, ' -')+"\n" + + s += "Current string: \n #{@cur_string_array.join.inspect}\n" unless @cur_string_array.empty? + s + end + end # SpanContext_Array + + class SpanContext_String + include MaRuKu::Strings + + # Read elements + attr_accessor :elements + + def initialize + @elements = [] + @cur_string = '' + end + + def push_element(e) + raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if + not (e.kind_of?(String) or e.kind_of?(MDElement)) + + push_string_if_present + @elements << e + nil + end + alias push push_element + + def push_elements(a) + for e in a + if e.kind_of? String + @cur_string << e # e.each_byte do |b| push_char b end + else + push_element e + end + end + end + + def is_end? + @cur_string.empty? || @cur_string =~ /\s\Z/ + end + + def push_string_if_present + unless @cur_string.empty? @elements << @cur_string - @cur_string = "" + @cur_string = '' end nil end @@ -726,8 +809,8 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser # push space into current string if # there isn't one def push_space - last = @cur_string[@cur_string.size-1] - @cur_string << ?\ if last != ?\ + last = @cur_string[@cur_string.length - 1] + @cur_string << ' ' unless last == ?\ end def describe @@ -735,12 +818,11 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser s = "Elements read in span: \n" + lines.gsub(/^/, ' -')+"\n" - if @cur_string.size > 0 - s += "Current string: \n #{@cur_string.inspect}\n" - end + s += "Current string: \n #{@cur_string_array.join.inspect}\n" unless @cur_string_array.empty? s end - end # SpanContext + end # SpanContext_String + end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser