Bring up to current.
This commit is contained in:
parent
69b62b6f33
commit
b19e1e4f47
71 changed files with 8305 additions and 39 deletions
325
lib/maruku/input/charsource.rb
Normal file
325
lib/maruku/input/charsource.rb
Normal file
|
@ -0,0 +1,325 @@
|
|||
#--
|
||||
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
||||
#
|
||||
# This file is part of Maruku.
|
||||
#
|
||||
# Maruku is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Maruku is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Maruku; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#++
|
||||
|
||||
|
||||
module MaRuKu; module In; module Markdown; module SpanLevelParser
|
||||
|
||||
# a string scanner coded by me
|
||||
class CharSourceManual; end
|
||||
|
||||
# a wrapper around StringScanner
|
||||
class CharSourceStrscan; end
|
||||
|
||||
# A debug scanner that checks the correctness of both
|
||||
# by comparing their output
|
||||
class CharSourceDebug; end
|
||||
|
||||
# Choose!
|
||||
|
||||
CharSource = CharSourceManual # faster! 58ms vs. 65ms
|
||||
#CharSource = CharSourceStrscan
|
||||
#CharSource = CharSourceDebug
|
||||
|
||||
|
||||
class CharSourceManual
|
||||
include MaRuKu::Strings
|
||||
|
||||
def initialize(s, parent=nil)
|
||||
raise "Passed #{s.class}" if not s.kind_of? String
|
||||
@buffer = s
|
||||
@buffer_index = 0
|
||||
@parent = parent
|
||||
end
|
||||
|
||||
# Return current char as a FixNum (or nil).
|
||||
def cur_char; @buffer[@buffer_index] end
|
||||
|
||||
# Return the next n chars as a String.
|
||||
def cur_chars(n); @buffer[@buffer_index,n] end
|
||||
|
||||
# Return the char after current char as a FixNum (or nil).
|
||||
def next_char; @buffer[@buffer_index+1] end
|
||||
|
||||
def shift_char
|
||||
c = @buffer[@buffer_index]
|
||||
@buffer_index+=1
|
||||
c
|
||||
end
|
||||
|
||||
def ignore_char
|
||||
@buffer_index+=1
|
||||
nil
|
||||
end
|
||||
|
||||
def ignore_chars(n)
|
||||
@buffer_index+=n
|
||||
nil
|
||||
end
|
||||
|
||||
def current_remaining_buffer
|
||||
@buffer[@buffer_index, @buffer.size-@buffer_index]
|
||||
end
|
||||
|
||||
def cur_chars_are(string)
|
||||
# There is a bug here
|
||||
if false
|
||||
r2 = /^.{#{@buffer_index}}#{Regexp.escape string}/m
|
||||
@buffer =~ r2
|
||||
else
|
||||
cur_chars(string.size) == string
|
||||
end
|
||||
end
|
||||
|
||||
def next_matches(r)
|
||||
r2 = /^.{#{@buffer_index}}#{r}/m
|
||||
md = r2.match @buffer
|
||||
return !!md
|
||||
end
|
||||
|
||||
def read_regexp3(r)
|
||||
r2 = /^.{#{@buffer_index}}#{r}/m
|
||||
m = r2.match @buffer
|
||||
if m
|
||||
consumed = m.to_s.size - @buffer_index
|
||||
# puts "Consumed #{consumed} chars (entire is #{m.to_s.inspect})"
|
||||
ignore_chars consumed
|
||||
else
|
||||
# puts "Could not read regexp #{r2.inspect} from buffer "+
|
||||
# " index=#{@buffer_index}"
|
||||
# puts "Cur chars = #{cur_chars(20).inspect}"
|
||||
# puts "Matches? = #{cur_chars(20) =~ r}"
|
||||
end
|
||||
m
|
||||
end
|
||||
|
||||
def read_regexp(r)
|
||||
r2 = /^#{r}/
|
||||
rest = current_remaining_buffer
|
||||
m = r2.match(rest)
|
||||
if m
|
||||
@buffer_index += m.to_s.size
|
||||
# puts "#{r} matched #{rest.inspect}: #{m.to_s.inspect}"
|
||||
end
|
||||
return m
|
||||
end
|
||||
|
||||
def consume_whitespace
|
||||
while c = cur_char
|
||||
if (c == 32 || c == ?\t)
|
||||
# puts "ignoring #{c}"
|
||||
ignore_char
|
||||
else
|
||||
# puts "#{c} is not ws: "<<c
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def read_text_chars(out)
|
||||
s = @buffer.size; c=nil
|
||||
while @buffer_index < s && (c=@buffer[@buffer_index]) &&
|
||||
((c>=?a && c<=?z) || (c>=?A && c<=?Z))
|
||||
out << c
|
||||
@buffer_index += 1
|
||||
end
|
||||
end
|
||||
|
||||
def describe
|
||||
s = describe_pos(@buffer, @buffer_index)
|
||||
if @parent
|
||||
s += "\n\n" + @parent.describe
|
||||
end
|
||||
s
|
||||
end
|
||||
include SpanLevelParser
|
||||
end
|
||||
|
||||
def describe_pos(buffer, buffer_index)
|
||||
len = 75
|
||||
num_before = [len/2, buffer_index].min
|
||||
num_after = [len/2, buffer.size-buffer_index].min
|
||||
num_before_max = buffer_index
|
||||
num_after_max = buffer.size-buffer_index
|
||||
|
||||
# puts "num #{num_before} #{num_after}"
|
||||
num_before = [num_before_max, len-num_after].min
|
||||
num_after = [num_after_max, len-num_before].min
|
||||
# puts "num #{num_before} #{num_after}"
|
||||
|
||||
index_start = [buffer_index - num_before, 0].max
|
||||
index_end = [buffer_index + num_after, buffer.size].min
|
||||
|
||||
size = index_end- index_start
|
||||
|
||||
# puts "- #{index_start} #{size}"
|
||||
|
||||
str = buffer[index_start, size]
|
||||
str.gsub!("\n",'N')
|
||||
str.gsub!("\t",'T')
|
||||
|
||||
if index_end == buffer.size
|
||||
str += "EOF"
|
||||
end
|
||||
|
||||
pre_s = buffer_index-index_start
|
||||
pre_s = [pre_s, 0].max
|
||||
pre_s2 = [len-pre_s,0].max
|
||||
# puts "pre_S = #{pre_s}"
|
||||
pre =" "*(pre_s)
|
||||
|
||||
"-"*len+"\n"+
|
||||
str + "\n" +
|
||||
"-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
|
||||
# pre + "|\n"+
|
||||
pre + "+--- Byte #{buffer_index}\n"+
|
||||
|
||||
"Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
|
||||
add_tabs(buffer,1,">")
|
||||
|
||||
# "CharSource: At character #{@buffer_index} of block "+
|
||||
# " beginning with:\n #{@buffer[0,50].inspect} ...\n"+
|
||||
# " before: \n ... #{cur_chars(50).inspect} ... "
|
||||
end
|
||||
|
||||
|
||||
require 'strscan'
|
||||
|
||||
class CharSourceStrscan
|
||||
include SpanLevelParser
|
||||
include MaRuKu::Strings
|
||||
|
||||
def initialize(s)
|
||||
@s = StringScanner.new(s)
|
||||
end
|
||||
|
||||
# Return current char as a FixNum (or nil).
|
||||
def cur_char
|
||||
@s.peek(1)[0]
|
||||
end
|
||||
|
||||
# Return the next n chars as a String.
|
||||
def cur_chars(n);
|
||||
@s.peek(n)
|
||||
end
|
||||
|
||||
# Return the char after current char as a FixNum (or nil).
|
||||
def next_char;
|
||||
@s.peek(2)[1]
|
||||
end
|
||||
|
||||
def shift_char
|
||||
(@s.get_byte)[0]
|
||||
end
|
||||
|
||||
def ignore_char
|
||||
@s.get_byte
|
||||
nil
|
||||
end
|
||||
|
||||
def ignore_chars(n)
|
||||
n.times do @s.get_byte end
|
||||
nil
|
||||
end
|
||||
|
||||
def current_remaining_buffer
|
||||
@s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index]
|
||||
end
|
||||
|
||||
def cur_chars_are(string)
|
||||
cur_chars(string.size) == string
|
||||
end
|
||||
|
||||
def next_matches(r)
|
||||
len = @s.match?(r)
|
||||
return !!len
|
||||
end
|
||||
|
||||
def read_regexp(r)
|
||||
string = @s.scan(r)
|
||||
if string
|
||||
return r.match(string)
|
||||
else
|
||||
return nil
|
||||
end
|
||||
end
|
||||
|
||||
def consume_whitespace
|
||||
@s.scan /\s+/
|
||||
nil
|
||||
end
|
||||
|
||||
def describe
|
||||
describe_pos(@s.string, @s.pos)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
class CharSourceDebug
|
||||
def initialize(s)
|
||||
@a = CharSourceManual.new(s)
|
||||
@b = CharSourceStrscan.new(s)
|
||||
end
|
||||
|
||||
def method_missing(methodname, *args)
|
||||
a_bef = @a.describe
|
||||
b_bef = @b.describe
|
||||
|
||||
a = @a.send(methodname, *args)
|
||||
b = @b.send(methodname, *args)
|
||||
|
||||
# if methodname == :describe
|
||||
# return a
|
||||
# end
|
||||
|
||||
if a.kind_of? MatchData
|
||||
if a.to_a != b.to_a
|
||||
puts "called: #{methodname}(#{args})"
|
||||
puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
|
||||
puts "AFTER: "+@a.describe
|
||||
puts "AFTER: "+@b.describe
|
||||
puts "BEFORE: "+a_bef
|
||||
puts "BEFORE: "+b_bef
|
||||
puts caller.join("\n")
|
||||
exit
|
||||
end
|
||||
else
|
||||
if a!=b
|
||||
puts "called: #{methodname}(#{args})"
|
||||
puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
|
||||
puts ""+@a.describe
|
||||
puts ""+@b.describe
|
||||
puts caller.join("\n")
|
||||
exit
|
||||
end
|
||||
end
|
||||
|
||||
if @a.cur_char != @b.cur_char
|
||||
puts "Fuori sincronia dopo #{methodname}(#{args})"
|
||||
puts ""+@a.describe
|
||||
puts ""+@b.describe
|
||||
exit
|
||||
end
|
||||
|
||||
return a
|
||||
end
|
||||
end
|
||||
|
||||
end end end end
|
68
lib/maruku/input/extensions.rb
Normal file
68
lib/maruku/input/extensions.rb
Normal file
|
@ -0,0 +1,68 @@
|
|||
module MaRuKu; module In; module Markdown
|
||||
|
||||
|
||||
# Hash Fixnum -> name
|
||||
SpanExtensionsTrigger = {}
|
||||
|
||||
|
||||
class SpanExtension
|
||||
# trigging chars
|
||||
attr_accessor :chars
|
||||
# trigging regexp
|
||||
attr_accessor :regexp
|
||||
# lambda
|
||||
attr_accessor :block
|
||||
end
|
||||
|
||||
# Hash String -> Extension
|
||||
SpanExtensions = {}
|
||||
|
||||
def check_span_extensions(src, con)
|
||||
c = src.cur_char
|
||||
if extensions = SpanExtensionsTrigger[c]
|
||||
extensions.each do |e|
|
||||
if e.regexp && (match = src.next_matches(e.regexp))
|
||||
return true if e.block.call(doc, src, con)
|
||||
end
|
||||
end
|
||||
end
|
||||
return false # not special
|
||||
end
|
||||
|
||||
def self.register_span_extension(args, &block)
|
||||
e = SpanExtension.new
|
||||
e.chars = [*args[:chars]]
|
||||
e.regexp = args[:regexp]
|
||||
e.block = block
|
||||
e.chars.each do |c|
|
||||
(SpanExtensionsTrigger[c] ||= []).push e
|
||||
end
|
||||
end
|
||||
|
||||
def self.register_block_extension(args, &block)
|
||||
regexp = args[:regexp]
|
||||
BlockExtensions[regexp] = block
|
||||
end
|
||||
|
||||
# Hash Regexp -> Block
|
||||
BlockExtensions = {}
|
||||
|
||||
def check_block_extensions(src, con, line)
|
||||
BlockExtensions.each do |reg, block|
|
||||
if m = reg.match(line)
|
||||
block = BlockExtensions[reg]
|
||||
return true if block.call(doc, src, con)
|
||||
end
|
||||
end
|
||||
return false # not special
|
||||
end
|
||||
|
||||
def any_matching_block_extension?(line)
|
||||
BlockExtensions.each_key do |reg|
|
||||
m = reg.match(line)
|
||||
return m if m
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
end end end
|
144
lib/maruku/input/html_helper.rb
Normal file
144
lib/maruku/input/html_helper.rb
Normal file
|
@ -0,0 +1,144 @@
|
|||
#--
|
||||
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
||||
#
|
||||
# This file is part of Maruku.
|
||||
#
|
||||
# Maruku is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Maruku is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Maruku; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#++
|
||||
|
||||
|
||||
module MaRuKu; module In; module Markdown; module SpanLevelParser
|
||||
|
||||
# This class helps me read and sanitize HTML blocks
|
||||
|
||||
# I tried to do this with REXML, but wasn't able to. (suggestions?)
|
||||
|
||||
class HTMLHelper
|
||||
include MaRuKu::Strings
|
||||
|
||||
Tag = %r{^<(/)?(\w+)\s*([^>]*)>}m
|
||||
EverythingElse = %r{^[^<]+}m
|
||||
CommentStart = %r{^<!--}x
|
||||
CommentEnd = %r{^.*-->}
|
||||
TO_SANITIZE = ['img','hr']
|
||||
|
||||
# attr_accessor :inside_comment
|
||||
attr_reader :rest
|
||||
|
||||
def initialize
|
||||
@rest = ""
|
||||
@tag_stack = []
|
||||
@m = nil
|
||||
@already = ""
|
||||
@inside_comment = false
|
||||
end
|
||||
|
||||
def eat_this(line)
|
||||
@rest = line + @rest
|
||||
things_read = 0
|
||||
until @rest.empty?
|
||||
if @inside_comment
|
||||
if @m = CommentEnd.match(@rest)
|
||||
@inside_comment = false
|
||||
@already += @m.pre_match + @m.to_s
|
||||
@rest = @m.post_match
|
||||
elsif @m = EverythingElse.match(@rest)
|
||||
@already += @m.pre_match + @m.to_s
|
||||
@rest = @m.post_match
|
||||
end
|
||||
else
|
||||
if @m = CommentStart.match(@rest)
|
||||
things_read += 1
|
||||
@inside_comment = true
|
||||
@already += @m.pre_match + @m.to_s
|
||||
@rest = @m.post_match
|
||||
elsif @m = Tag.match(@rest)
|
||||
things_read += 1
|
||||
@already += @m.pre_match
|
||||
@rest = @m.post_match
|
||||
|
||||
is_closing = !!@m[1]
|
||||
tag = @m[2]
|
||||
attributes = @m[3]
|
||||
|
||||
is_single = false
|
||||
if attributes =~ /\A(.*)\/\Z/
|
||||
attributes = $1
|
||||
is_single = true
|
||||
end
|
||||
|
||||
if TO_SANITIZE.include? tag
|
||||
attributes.strip!
|
||||
# puts "Attributes: #{attributes.inspect}"
|
||||
if attributes.size > 0
|
||||
@already += '<%s %s />' % [tag, attributes]
|
||||
else
|
||||
@already += '<%s />' % [tag]
|
||||
end
|
||||
elsif is_closing
|
||||
@already += @m.to_s
|
||||
if @tag_stack.empty?
|
||||
error "Malformed: closing tag #{tag.inspect} "+
|
||||
"in empty list"
|
||||
end
|
||||
if @tag_stack.last != tag
|
||||
error "Malformed: tag <#{tag}> "+
|
||||
"closes <#{@tag_stack.last}>"
|
||||
end
|
||||
@tag_stack.pop
|
||||
elsif not is_single
|
||||
@tag_stack.push tag
|
||||
@already += @m.to_s
|
||||
end
|
||||
elsif @m = EverythingElse.match(@rest)
|
||||
@already += @m.pre_match + @m.to_s
|
||||
@rest = @m.post_match
|
||||
else
|
||||
error "Malformed HTML: not complete: #{@rest.inspect}"
|
||||
end
|
||||
end # not inside comment
|
||||
|
||||
# puts inspect
|
||||
# puts "Read: #{@tag_stack.inspect}"
|
||||
break if is_finished? and things_read>0
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def error(s)
|
||||
raise Exception, "Error: #{s} \n"+ inspect, caller
|
||||
end
|
||||
|
||||
def inspect; "HTML READER\n comment=#{@inside_comment} "+
|
||||
"match=#{@m.to_s.inspect}\n"+
|
||||
"Tag stack = #{@tag_stack.inspect} \n"+
|
||||
"Before:\n"+
|
||||
add_tabs(@already,1,'|')+"\n"+
|
||||
"After:\n"+
|
||||
add_tabs(@rest,1,'|')+"\n"
|
||||
|
||||
end
|
||||
|
||||
|
||||
def stuff_you_read
|
||||
@already
|
||||
end
|
||||
|
||||
def is_finished?
|
||||
not @inside_comment and @tag_stack.empty?
|
||||
end
|
||||
end # html helper
|
||||
|
||||
end end end end
|
111
lib/maruku/input/linesource.rb
Normal file
111
lib/maruku/input/linesource.rb
Normal file
|
@ -0,0 +1,111 @@
|
|||
#--
|
||||
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
||||
#
|
||||
# This file is part of Maruku.
|
||||
#
|
||||
# Maruku is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Maruku is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Maruku; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#++
|
||||
|
||||
|
||||
module MaRuKu; module In; module Markdown; module BlockLevelParser
|
||||
|
||||
# This represents a source of lines that can be consumed.
|
||||
#
|
||||
# It is the twin of CharSource.
|
||||
#
|
||||
|
||||
class LineSource
|
||||
include MaRuKu::Strings
|
||||
|
||||
def initialize(lines, parent=nil, parent_offset=nil)
|
||||
raise "NIL lines? " if not lines
|
||||
@lines = lines
|
||||
@lines_index = 0
|
||||
@parent = parent
|
||||
@parent_offset = parent_offset
|
||||
end
|
||||
|
||||
def cur_line() @lines[@lines_index] end
|
||||
def next_line() @lines[@lines_index+1] end
|
||||
|
||||
def shift_line()
|
||||
raise "Over the rainbow" if @lines_index >= @lines.size
|
||||
l = @lines[@lines_index]
|
||||
@lines_index += 1
|
||||
return l
|
||||
end
|
||||
|
||||
def ignore_line
|
||||
raise "Over the rainbow" if @lines_index >= @lines.size
|
||||
@lines_index += 1
|
||||
end
|
||||
|
||||
def describe
|
||||
#s = "At line ##{@lines_index} of #{@lines.size}:\n"
|
||||
s = "At line #{original_line_number(@lines_index)}\n"
|
||||
|
||||
context = 3 # lines
|
||||
from = [@lines_index-context, 0].max
|
||||
to = [@lines_index+context, @lines.size-1].min
|
||||
|
||||
for i in from..to
|
||||
prefix = (i == @lines_index) ? '--> ' : ' ';
|
||||
l = @lines[i]
|
||||
s += "%10s %4s|#{l}" %
|
||||
[@lines[i].md_type.to_s, prefix]
|
||||
|
||||
s += "|\n"
|
||||
end
|
||||
|
||||
# if @parent
|
||||
# s << "Parent context is: \n"
|
||||
# s << add_tabs(@parent.describe,1,'|')
|
||||
# end
|
||||
s
|
||||
end
|
||||
|
||||
def original_line_number(index)
|
||||
if @parent
|
||||
return index + @parent.original_line_number(@parent_offset)
|
||||
else
|
||||
1 + index
|
||||
end
|
||||
end
|
||||
|
||||
def cur_index
|
||||
@lines_index
|
||||
end
|
||||
|
||||
# Returns the type of next line as a string
|
||||
# breaks at first :definition
|
||||
def tell_me_the_future
|
||||
s = ""; num_e = 0;
|
||||
for i in @lines_index..@lines.size-1
|
||||
c = case @lines[i].md_type
|
||||
when :text; "t"
|
||||
when :empty; num_e+=1; "e"
|
||||
when :definition; "d"
|
||||
else "o"
|
||||
end
|
||||
s += c
|
||||
break if c == "d" or num_e>1
|
||||
end
|
||||
s
|
||||
end
|
||||
|
||||
end # linesource
|
||||
|
||||
end end end end # block
|
||||
|
594
lib/maruku/input/parse_block.rb
Normal file
594
lib/maruku/input/parse_block.rb
Normal file
|
@ -0,0 +1,594 @@
|
|||
#--
|
||||
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
||||
#
|
||||
# This file is part of Maruku.
|
||||
#
|
||||
# Maruku is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Maruku is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Maruku; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#++
|
||||
|
||||
|
||||
module MaRuKu; module In; module Markdown; module BlockLevelParser
|
||||
|
||||
include Helpers
|
||||
include MaRuKu::Strings
|
||||
include MaRuKu::In::Markdown::SpanLevelParser
|
||||
|
||||
class BlockContext < Array
|
||||
def describe
|
||||
n = 5
|
||||
desc = size > n ? self[-n,n] : self
|
||||
"Last #{n} elements: "+
|
||||
desc.map{|x| "\n -" + x.inspect}.join
|
||||
end
|
||||
end
|
||||
|
||||
# Splits the string and calls parse_lines_as_markdown
|
||||
def parse_text_as_markdown(text)
|
||||
lines = split_lines(text)
|
||||
src = LineSource.new(lines)
|
||||
return parse_blocks(src)
|
||||
end
|
||||
|
||||
# Input is a LineSource
|
||||
def parse_blocks(src)
|
||||
output = BlockContext.new
|
||||
|
||||
# run state machine
|
||||
while src.cur_line
|
||||
|
||||
next if check_block_extensions(src, output, src.cur_line)
|
||||
|
||||
# Prints detected type (useful for debugging)
|
||||
# puts "#{src.cur_line.md_type}|#{src.cur_line}"
|
||||
case src.cur_line.md_type
|
||||
when :empty;
|
||||
output.push :empty
|
||||
src.ignore_line
|
||||
when :ial
|
||||
m = InlineAttributeList.match src.shift_line
|
||||
content = m[1] || ""
|
||||
src2 = CharSource.new(content, src)
|
||||
interpret_extension(src2, output, [nil])
|
||||
when :ald
|
||||
output.push read_ald(src)
|
||||
when :text
|
||||
if src.cur_line =~ MightBeTableHeader and
|
||||
(src.next_line && src.next_line =~ TableSeparator)
|
||||
output.push read_table(src)
|
||||
elsif [:header1,:header2].include? src.next_line.md_type
|
||||
output.push read_header12(src)
|
||||
elsif eventually_comes_a_def_list(src)
|
||||
definition = read_definition(src)
|
||||
if output.last.kind_of?(MDElement) &&
|
||||
output.last.node_type == :definition_list then
|
||||
output.last.children << definition
|
||||
else
|
||||
output.push md_el(:definition_list, [definition])
|
||||
end
|
||||
else # Start of a paragraph
|
||||
output.push read_paragraph(src)
|
||||
end
|
||||
when :header2, :hrule
|
||||
# hrule
|
||||
src.shift_line
|
||||
output.push md_hrule()
|
||||
when :header3
|
||||
output.push read_header3(src)
|
||||
when :ulist, :olist
|
||||
list_type = src.cur_line.md_type == :ulist ? :ul : :ol
|
||||
li = read_list_item(src)
|
||||
# append to current list if we have one
|
||||
if output.last.kind_of?(MDElement) &&
|
||||
output.last.node_type == list_type then
|
||||
output.last.children << li
|
||||
else
|
||||
output.push md_el(list_type, [li])
|
||||
end
|
||||
when :quote; output.push read_quote(src)
|
||||
when :code; e = read_code(src); output << e if e
|
||||
when :raw_html; e = read_raw_html(src); output << e if e
|
||||
|
||||
when :footnote_text; output.push read_footnote_text(src)
|
||||
when :ref_definition; output.push read_ref_definition(src)
|
||||
when :abbreviation; output.push read_abbreviation(src)
|
||||
when :xml_instr; read_xml_instruction(src, output)
|
||||
when :metadata;
|
||||
maruku_error "Please use the new meta-data syntax: \n"+
|
||||
" http://maruku.rubyforge.org/proposal.html\n", src
|
||||
src.ignore_line
|
||||
else # warn if we forgot something
|
||||
md_type = src.cur_line.md_type
|
||||
line = src.cur_line
|
||||
maruku_error "Ignoring line '#{line}' type = #{md_type}", src
|
||||
src.shift_line
|
||||
end
|
||||
end
|
||||
|
||||
merge_ial(output, src, output)
|
||||
output.delete_if {|x| x.kind_of?(MDElement) &&
|
||||
x.node_type == :ial}
|
||||
|
||||
# get rid of empty line markers
|
||||
output.delete_if {|x| x == :empty}
|
||||
# See for each list if we can omit the paragraphs and use li_span
|
||||
# TODO: do this after
|
||||
output.each do |c|
|
||||
# Remove paragraphs that we can get rid of
|
||||
if [:ul,:ol].include? c.node_type
|
||||
if c.children.all? {|li| !li.want_my_paragraph} then
|
||||
c.children.each do |d|
|
||||
d.node_type = :li_span
|
||||
d.children = d.children[0].children
|
||||
end
|
||||
end
|
||||
end
|
||||
if c.node_type == :definition_list
|
||||
if c.children.all?{|defi| !defi.want_my_paragraph} then
|
||||
c.children.each do |definition|
|
||||
definition.definitions.each do |dd|
|
||||
dd.children = dd.children[0].children
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
output
|
||||
end
|
||||
|
||||
|
||||
|
||||
def read_ald(src)
|
||||
if (l=src.shift_line) =~ AttributeDefinitionList
|
||||
id = $1; al=$2;
|
||||
al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])
|
||||
self.ald[id] = al;
|
||||
return md_ald(id, al)
|
||||
else
|
||||
maruku_error "Bug Bug:\n#{l.inspect}"
|
||||
return nil
|
||||
end
|
||||
end
|
||||
|
||||
# reads a header (with ----- or ========)
|
||||
def read_header12(src)
|
||||
line = src.shift_line.strip
|
||||
al = nil
|
||||
# Check if there is an IAL
|
||||
if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
|
||||
line = $1.strip
|
||||
ial = $2
|
||||
al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
|
||||
end
|
||||
text = parse_lines_as_span [ line ]
|
||||
level = src.cur_line.md_type == :header2 ? 2 : 1;
|
||||
src.shift_line
|
||||
return md_header(level, text, al)
|
||||
end
|
||||
|
||||
# reads a header like '#### header ####'
|
||||
def read_header3(src)
|
||||
line = src.shift_line.strip
|
||||
al = nil
|
||||
# Check if there is an IAL
|
||||
if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
|
||||
line = $1.strip
|
||||
ial = $2
|
||||
al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
|
||||
end
|
||||
level = num_leading_hashes(line)
|
||||
text = parse_lines_as_span [strip_hashes(line)]
|
||||
return md_header(level, text, al)
|
||||
end
|
||||
|
||||
def read_xml_instruction(src, output)
|
||||
m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
|
||||
raise "BugBug" if not m
|
||||
target = m[2] || ''
|
||||
code = m[3]
|
||||
until code =~ /\?>/
|
||||
code += "\n"+src.shift_line
|
||||
end
|
||||
if not code =~ (/\?>\s*$/)
|
||||
garbage = (/\?>(.*)$/.match(code))[1]
|
||||
maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+
|
||||
add_tabs(code, 1, '|'), src
|
||||
end
|
||||
code.gsub!(/\?>\s*$/, '')
|
||||
|
||||
if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
|
||||
result = safe_execute_code(self, code)
|
||||
if result
|
||||
if result.kind_of? String
|
||||
raise "Not expected"
|
||||
else
|
||||
output.push *result
|
||||
end
|
||||
end
|
||||
else
|
||||
output.push md_xml_instr(target, code)
|
||||
end
|
||||
end
|
||||
|
||||
def read_raw_html(src)
|
||||
h = HTMLHelper.new
|
||||
begin
|
||||
h.eat_this(l=src.shift_line)
|
||||
# puts "\nBLOCK:\nhtml -> #{l.inspect}"
|
||||
while src.cur_line and not h.is_finished?
|
||||
l=src.shift_line
|
||||
# puts "html -> #{l.inspect}"
|
||||
h.eat_this "\n"+l
|
||||
end
|
||||
rescue Exception => e
|
||||
ex = e.inspect + e.backtrace.join("\n")
|
||||
maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'|')}\n", src
|
||||
end
|
||||
raw_html = h.stuff_you_read
|
||||
return md_html(raw_html)
|
||||
end
|
||||
|
||||
def read_paragraph(src)
|
||||
lines = []
|
||||
while src.cur_line
|
||||
# :olist does not break
|
||||
case t = src.cur_line.md_type
|
||||
when :quote,:header3,:empty,:raw_html,:ref_definition,:ial,:xml_instr
|
||||
break
|
||||
when :olist,:ulist
|
||||
break if src.next_line.md_type == t
|
||||
end
|
||||
break if src.cur_line.strip.size == 0
|
||||
break if [:header1,:header2].include? src.next_line.md_type
|
||||
break if any_matching_block_extension?(src.cur_line)
|
||||
|
||||
lines << src.shift_line
|
||||
end
|
||||
# dbg_describe_ary(lines, 'PAR')
|
||||
children = parse_lines_as_span(lines, src)
|
||||
|
||||
return md_par(children)
|
||||
end
|
||||
|
||||
# Reads one list item, either ordered or unordered.
|
||||
def read_list_item(src)
|
||||
parent_offset = src.cur_index
|
||||
|
||||
item_type = src.cur_line.md_type
|
||||
first = src.shift_line
|
||||
|
||||
# Ugly things going on inside `read_indented_content`
|
||||
indentation = spaces_before_first_char(first)
|
||||
break_list = [:ulist, :olist, :ial]
|
||||
lines, want_my_paragraph =
|
||||
read_indented_content(src,indentation, break_list, item_type)
|
||||
|
||||
# add first line
|
||||
# Strip first '*', '-', '+' from first line
|
||||
stripped = first[indentation, first.size-1]
|
||||
lines.unshift stripped
|
||||
|
||||
#dbg_describe_ary(lines, 'LIST ITEM ')
|
||||
|
||||
src2 = LineSource.new(lines, src, parent_offset)
|
||||
children = parse_blocks(src2)
|
||||
with_par = want_my_paragraph || (children.size>1)
|
||||
|
||||
return md_li(children, with_par)
|
||||
end
|
||||
|
||||
def read_abbreviation(src)
|
||||
if not (l=src.shift_line) =~ Abbreviation
|
||||
maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
|
||||
end
|
||||
|
||||
abbr = $1
|
||||
desc = $2
|
||||
|
||||
if (not abbr) or (abbr.size==0)
|
||||
maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
|
||||
end
|
||||
|
||||
self.abbreviations[abbr] = desc
|
||||
|
||||
return md_abbr_def(abbr, desc)
|
||||
end
|
||||
|
||||
def read_footnote_text(src)
|
||||
parent_offset = src.cur_index
|
||||
|
||||
first = src.shift_line
|
||||
|
||||
if not first =~ FootnoteText
|
||||
maruku_error "Bug (it's Andrea's fault)"
|
||||
end
|
||||
|
||||
id = $1
|
||||
text = $2
|
||||
|
||||
# Ugly things going on inside `read_indented_content`
|
||||
indentation = 4 #first.size-text.size
|
||||
|
||||
# puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
|
||||
|
||||
break_list = [:footnote_text]
|
||||
item_type = :footnote_text
|
||||
lines, want_my_paragraph =
|
||||
read_indented_content(src,indentation, break_list, item_type)
|
||||
|
||||
# add first line
|
||||
if text && text.strip != "" then lines.unshift text end
|
||||
|
||||
# dbg_describe_ary(lines, 'FOOTNOTE')
|
||||
src2 = LineSource.new(lines, src, parent_offset)
|
||||
children = parse_blocks(src2)
|
||||
|
||||
e = md_footnote(id, children)
|
||||
self.footnotes[id] = e
|
||||
return e
|
||||
end
|
||||
|
||||
|
||||
# This is the only ugly function in the code base.
|
||||
# It is used to read list items, descriptions, footnote text
|
||||
def read_indented_content(src, indentation, break_list, item_type)
|
||||
lines =[]
|
||||
# collect all indented lines
|
||||
saw_empty = false; saw_anything_after = false
|
||||
while src.cur_line
|
||||
#puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"
|
||||
if src.cur_line.md_type == :empty
|
||||
saw_empty = true
|
||||
lines << src.shift_line
|
||||
next
|
||||
end
|
||||
|
||||
# after a white line
|
||||
if saw_empty
|
||||
# we expect things to be properly aligned
|
||||
if (ns=number_of_leading_spaces(src.cur_line)) < indentation
|
||||
#puts "breaking for spaces, only #{ns}: #{src.cur_line}"
|
||||
break
|
||||
end
|
||||
saw_anything_after = true
|
||||
else
|
||||
break if break_list.include? src.cur_line.md_type
|
||||
# break if src.cur_line.md_type != :text
|
||||
end
|
||||
|
||||
|
||||
stripped = strip_indent(src.shift_line, indentation)
|
||||
lines << stripped
|
||||
|
||||
#puts "Accepted as #{stripped.inspect}"
|
||||
|
||||
# You are only required to indent the first line of
|
||||
# a child paragraph.
|
||||
if stripped.md_type == :text
|
||||
while src.cur_line && (src.cur_line.md_type == :text)
|
||||
lines << strip_indent(src.shift_line, indentation)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
want_my_paragraph = saw_anything_after ||
|
||||
(saw_empty && (src.cur_line && (src.cur_line.md_type == item_type)))
|
||||
|
||||
# dbg_describe_ary(lines, 'LI')
|
||||
# create a new context
|
||||
|
||||
while lines.last && (lines.last.md_type == :empty)
|
||||
lines.pop
|
||||
end
|
||||
|
||||
return lines, want_my_paragraph
|
||||
end
|
||||
|
||||
|
||||
def read_quote(src)
|
||||
parent_offset = src.cur_index
|
||||
|
||||
lines = []
|
||||
# collect all indented lines
|
||||
while src.cur_line && src.cur_line.md_type == :quote
|
||||
lines << unquote(src.shift_line)
|
||||
end
|
||||
# dbg_describe_ary(lines, 'QUOTE')
|
||||
|
||||
src2 = LineSource.new(lines, src, parent_offset)
|
||||
children = parse_blocks(src2)
|
||||
return md_quote(children)
|
||||
end
|
||||
|
||||
def read_code(src)
|
||||
# collect all indented lines
|
||||
lines = []
|
||||
while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)
|
||||
lines << strip_indent(src.shift_line, 4)
|
||||
end
|
||||
|
||||
#while lines.last && (lines.last.md_type == :empty )
|
||||
while lines.last && lines.last.strip.size == 0
|
||||
lines.pop
|
||||
end
|
||||
|
||||
while lines.first && lines.first.strip.size == 0
|
||||
lines.shift
|
||||
end
|
||||
|
||||
return nil if lines.empty?
|
||||
|
||||
source = lines.join("\n")
|
||||
|
||||
# dbg_describe_ary(lines, 'CODE')
|
||||
|
||||
return md_codeblock(source)
|
||||
end
|
||||
|
||||
# Reads a series of metadata lines with empty lines in between
|
||||
def read_metadata(src)
|
||||
hash = {}
|
||||
while src.cur_line
|
||||
case src.cur_line.md_type
|
||||
when :empty; src.shift_line
|
||||
when :metadata; hash.merge! parse_metadata(src.shift_line)
|
||||
else break
|
||||
end
|
||||
end
|
||||
hash
|
||||
end
|
||||
|
||||
|
||||
def read_ref_definition(src)
|
||||
line = src.shift_line
|
||||
|
||||
# if link is incomplete, shift next line
|
||||
if src.cur_line && (src.cur_line.md_type != :ref_definition) &&
|
||||
([1,2,3].include? number_of_leading_spaces(src.cur_line) )
|
||||
line += " "+ src.shift_line
|
||||
end
|
||||
|
||||
# puts "total= #{line}"
|
||||
|
||||
match = LinkRegex.match(line)
|
||||
if not match
|
||||
error "Link does not respect format: '#{line}'"
|
||||
end
|
||||
|
||||
id = match[1]; url = match[2]; title = match[3];
|
||||
id = id.strip.downcase
|
||||
|
||||
hash = self.refs[id] = {:url=>url,:title=>title}
|
||||
|
||||
stuff=match[4]
|
||||
|
||||
if stuff
|
||||
stuff.split.each do |couple|
|
||||
# puts "found #{couple}"
|
||||
k, v = couple.split('=')
|
||||
v ||= ""
|
||||
if v[0,1]=='"' then v = v[1, v.size-2] end
|
||||
# puts "key:_#{k}_ value=_#{v}_"
|
||||
hash[k.to_sym] = v
|
||||
end
|
||||
end
|
||||
# puts hash.inspect
|
||||
|
||||
return md_ref_def(id, url, meta={:title=>title})
|
||||
end
|
||||
|
||||
def read_table(src)
|
||||
|
||||
def split_cells(s)
|
||||
s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
|
||||
end
|
||||
|
||||
head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) }
|
||||
|
||||
separator=split_cells(src.shift_line)
|
||||
|
||||
align = separator.map { |s| s =~ Sep
|
||||
if $1 and $2 then :center elsif $2 then :right else :left end }
|
||||
|
||||
num_columns = align.size
|
||||
|
||||
if head.size != num_columns
|
||||
maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
|
||||
tell_user "I will ignore this table."
|
||||
# XXX try to recover
|
||||
return md_br()
|
||||
end
|
||||
|
||||
rows = []
|
||||
|
||||
while src.cur_line && src.cur_line =~ /\|/
|
||||
row = split_cells(src.shift_line).map{|s|
|
||||
md_el(:cell, parse_lines_as_span([s]))}
|
||||
if head.size != num_columns
|
||||
maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
|
||||
tell_user "I will ignore this table."
|
||||
# XXX try to recover
|
||||
return md_br()
|
||||
end
|
||||
rows << row
|
||||
end
|
||||
|
||||
children = (head+rows).flatten
|
||||
return md_el(:table, children, {:align => align})
|
||||
end
|
||||
|
||||
# If current line is text, a definition list is coming
|
||||
# if 1) text,empty,[text,empty]*,definition
|
||||
|
||||
def eventually_comes_a_def_list(src)
|
||||
future = src.tell_me_the_future
|
||||
ok = future =~ %r{^t+e?d}x
|
||||
# puts "future: #{future} - #{ok}"
|
||||
ok
|
||||
end
|
||||
|
||||
|
||||
def read_definition(src)
|
||||
# Read one or more terms
|
||||
terms = []
|
||||
while src.cur_line && src.cur_line.md_type == :text
|
||||
terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))
|
||||
end
|
||||
# dbg_describe_ary(terms, 'DT')
|
||||
|
||||
want_my_paragraph = false
|
||||
|
||||
raise "Chunky Bacon!" if not src.cur_line
|
||||
|
||||
# one optional empty
|
||||
if src.cur_line.md_type == :empty
|
||||
want_my_paragraph = true
|
||||
src.shift_line
|
||||
end
|
||||
|
||||
raise "Chunky Bacon!" if src.cur_line.md_type != :definition
|
||||
|
||||
# Read one or more definitions
|
||||
definitions = []
|
||||
while src.cur_line && src.cur_line.md_type == :definition
|
||||
parent_offset = src.cur_index
|
||||
|
||||
first = src.shift_line
|
||||
first =~ Definition
|
||||
first = $1
|
||||
|
||||
# I know, it's ugly!!!
|
||||
|
||||
lines, w_m_p =
|
||||
read_indented_content(src,4, [:definition], :definition)
|
||||
want_my_paragraph ||= w_m_p
|
||||
|
||||
lines.unshift first
|
||||
|
||||
# dbg_describe_ary(lines, 'DD')
|
||||
src2 = LineSource.new(lines, src, parent_offset)
|
||||
children = parse_blocks(src2)
|
||||
definitions << md_el(:definition_data, children)
|
||||
end
|
||||
|
||||
return md_el(:definition, terms+definitions, {
|
||||
:terms => terms,
|
||||
:definitions => definitions,
|
||||
:want_my_paragraph => want_my_paragraph})
|
||||
end
|
||||
end # BlockLevelParser
|
||||
end # MaRuKu
|
||||
end
|
||||
end
|
225
lib/maruku/input/parse_doc.rb
Normal file
225
lib/maruku/input/parse_doc.rb
Normal file
|
@ -0,0 +1,225 @@
|
|||
#--
|
||||
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
||||
#
|
||||
# This file is part of Maruku.
|
||||
#
|
||||
# Maruku is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Maruku is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Maruku; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#++
|
||||
|
||||
|
||||
require 'iconv'
|
||||
|
||||
|
||||
module MaRuKu; module In; module Markdown; module BlockLevelParser
|
||||
|
||||
def parse_doc(s)
|
||||
|
||||
meta2 = parse_email_headers(s)
|
||||
data = meta2[:data]
|
||||
meta2.delete :data
|
||||
|
||||
self.attributes.merge! meta2
|
||||
|
||||
=begin maruku_doc
|
||||
Attribute: encoding
|
||||
Scope: document
|
||||
Summary: Encoding for the document.
|
||||
|
||||
If the `encoding` attribute is specified, then the content
|
||||
will be converted from the specified encoding to UTF-8.
|
||||
|
||||
Conversion happens using the `iconv` library.
|
||||
=end
|
||||
|
||||
enc = self.attributes[:encoding]
|
||||
self.attributes.delete :encoding
|
||||
if enc && enc.downcase != 'utf-8'
|
||||
converted = Iconv.new('utf-8', enc).iconv(data)
|
||||
|
||||
# puts "Data: #{data.inspect}: #{data}"
|
||||
# puts "Conv: #{converted.inspect}: #{converted}"
|
||||
|
||||
data = converted
|
||||
end
|
||||
|
||||
@children = parse_text_as_markdown(data)
|
||||
|
||||
if true #markdown_extra?
|
||||
self.search_abbreviations
|
||||
self.substitute_markdown_inside_raw_html
|
||||
end
|
||||
|
||||
toc = create_toc
|
||||
|
||||
# use title if not set
|
||||
if not self.attributes[:title] and toc.header_element
|
||||
title = toc.header_element.to_s
|
||||
self.attributes[:title] = title
|
||||
# puts "Set document title to #{title}"
|
||||
end
|
||||
|
||||
# save for later use
|
||||
self.toc = toc
|
||||
|
||||
# Now do the attributes magic
|
||||
each_element do |e|
|
||||
# default attribute list
|
||||
if default = self.ald[e.node_type.to_s]
|
||||
expand_attribute_list(default, e.attributes)
|
||||
end
|
||||
expand_attribute_list(e.al, e.attributes)
|
||||
# puts "#{e.node_type}: #{e.attributes.inspect}"
|
||||
end
|
||||
|
||||
=begin maruku_doc
|
||||
Attribute: unsafe_features
|
||||
Scope: global
|
||||
Summary: Enables execution of XML instructions.
|
||||
|
||||
Disabled by default because of security concerns.
|
||||
=end
|
||||
|
||||
if Maruku::Globals[:unsafe_features]
|
||||
self.execute_code_blocks
|
||||
# TODO: remove executed code blocks
|
||||
end
|
||||
end
|
||||
|
||||
# Expands an attribute list in an Hash
|
||||
def expand_attribute_list(al, result)
|
||||
al.each do |k, v|
|
||||
case k
|
||||
when :class
|
||||
if not result[:class]
|
||||
result[:class] = v
|
||||
else
|
||||
result[:class] += " " + v
|
||||
end
|
||||
when :id; result[:id] = v
|
||||
when :ref;
|
||||
if self.ald[v]
|
||||
already = (result[:expanded_references] ||= [])
|
||||
if not already.include?(v)
|
||||
already.push v
|
||||
expand_attribute_list(self.ald[v], result)
|
||||
else
|
||||
already.push v
|
||||
maruku_error "Circular reference between labels.\n\n"+
|
||||
"Label #{v.inspect} calls itself via recursion.\nThe recursion is "+
|
||||
(already.map{|x| x.inspect}.join(' => '))
|
||||
end
|
||||
else
|
||||
if not result[:unresolved_references]
|
||||
result[:unresolved_references] = v
|
||||
else
|
||||
result[:unresolved_references] << " #{v}"
|
||||
end
|
||||
|
||||
result[v.to_sym] = true
|
||||
end
|
||||
else
|
||||
result[k.to_sym]=v
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def safe_execute_code(object, code)
|
||||
begin
|
||||
return object.instance_eval(code)
|
||||
rescue Exception => e
|
||||
maruku_error "Exception while executing this:\n"+
|
||||
add_tabs(code, 1, ">")+
|
||||
"\nThe error was:\n"+
|
||||
add_tabs(e.inspect+"\n"+e.caller.join("\n"), 1, "|")
|
||||
rescue RuntimeError => e
|
||||
maruku_error "2: Exception while executing this:\n"+
|
||||
add_tabs(code, 1, ">")+
|
||||
"\nThe error was:\n"+
|
||||
add_tabs(e.inspect, 1, "|")
|
||||
rescue SyntaxError => e
|
||||
maruku_error "2: Exception while executing this:\n"+
|
||||
add_tabs(code, 1, ">")+
|
||||
"\nThe error was:\n"+
|
||||
add_tabs(e.inspect, 1, "|")
|
||||
end
|
||||
nil
|
||||
end
|
||||
|
||||
def execute_code_blocks
|
||||
self.each_element(:xml_instr) do |e|
|
||||
if e.target == 'maruku'
|
||||
result = safe_execute_code(e, e.code)
|
||||
if result.kind_of?(String)
|
||||
puts "Result is : #{result.inspect}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def search_abbreviations
|
||||
self.abbreviations.each do |abbrev, title|
|
||||
reg = Regexp.new(Regexp.escape(abbrev))
|
||||
self.replace_each_string do |s|
|
||||
if m = reg.match(s)
|
||||
e = md_abbr(abbrev.dup, title ? title.dup : nil)
|
||||
[m.pre_match, e, m.post_match]
|
||||
else
|
||||
s
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
include REXML
|
||||
# (PHP Markdown extra) Search for elements that have
|
||||
# markdown=1 or markdown=block defined
|
||||
def substitute_markdown_inside_raw_html
|
||||
self.each_element(:raw_html) do |e|
|
||||
doc = e.instance_variable_get :@parsed_html
|
||||
if doc # valid html
|
||||
# parse block-level markdown elements in these HTML tags
|
||||
block_tags = ['div']
|
||||
|
||||
# use xpath to find elements with 'markdown' attribute
|
||||
XPath.match(doc, "//*[attribute::markdown]" ).each do |e|
|
||||
# puts "Found #{e}"
|
||||
# should we parse block-level or span-level?
|
||||
parse_blocks = (e.attributes['markdown'] == 'block') ||
|
||||
block_tags.include?(e.name)
|
||||
# remove 'markdown' attribute
|
||||
e.delete_attribute 'markdown'
|
||||
# Select all text elements of e
|
||||
XPath.match(e, "//text()" ).each { |original_text|
|
||||
s = original_text.value.strip
|
||||
if s.size > 0
|
||||
el = md_el(:dummy,
|
||||
parse_blocks ? parse_text_as_markdown(s) :
|
||||
parse_lines_as_span([s]) )
|
||||
p = original_text.parent
|
||||
el.children_to_html.each do |x|
|
||||
p.insert_before(original_text, x)
|
||||
end
|
||||
p.delete(original_text)
|
||||
|
||||
end
|
||||
}
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end end end end
|
692
lib/maruku/input/parse_span_better.rb
Normal file
692
lib/maruku/input/parse_span_better.rb
Normal file
|
@ -0,0 +1,692 @@
|
|||
#--
|
||||
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
||||
#
|
||||
# This file is part of Maruku.
|
||||
#
|
||||
# Maruku is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Maruku is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Maruku; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#++
|
||||
|
||||
|
||||
require 'set'
|
||||
|
||||
module MaRuKu; module In; module Markdown; module SpanLevelParser
|
||||
include MaRuKu::Helpers
|
||||
|
||||
EscapedCharInText =
|
||||
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
|
||||
|
||||
EscapedCharInQuotes =
|
||||
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
|
||||
|
||||
EscapedCharInInlineCode = [?\\,?`]
|
||||
|
||||
def parse_lines_as_span(lines, parent=nil)
|
||||
parse_span_better lines.join("\n"), parent
|
||||
end
|
||||
|
||||
def parse_span_better(string, parent=nil)
|
||||
if not string.kind_of? String then
|
||||
error "Passed #{string.class}." end
|
||||
|
||||
st = (string + "")
|
||||
st.freeze
|
||||
src = CharSource.new(st, parent)
|
||||
read_span(src, EscapedCharInText, [nil])
|
||||
end
|
||||
|
||||
# This is the main loop for reading span elements
|
||||
#
|
||||
# It's long, but not *complex* or difficult to understand.
|
||||
#
|
||||
#
|
||||
def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
|
||||
con = SpanContext.new
|
||||
c = d = nil
|
||||
while true
|
||||
c = src.cur_char
|
||||
|
||||
# This is only an optimization which cuts 50% of the time used.
|
||||
# (but you can't use a-zA-z in exit_on_chars)
|
||||
if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
|
||||
con.cur_string << src.shift_char
|
||||
next
|
||||
end
|
||||
|
||||
break if exit_on_chars && exit_on_chars.include?(c)
|
||||
break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
|
||||
|
||||
# check if there are extensions
|
||||
if check_span_extensions(src, con)
|
||||
next
|
||||
end
|
||||
|
||||
case c = src.cur_char
|
||||
when ?\ # it's space (32)
|
||||
if src.cur_chars_are " \n"
|
||||
src.ignore_chars(3)
|
||||
con.push_element md_br()
|
||||
next
|
||||
else
|
||||
src.ignore_char
|
||||
con.push_space
|
||||
end
|
||||
when ?\n, ?\t
|
||||
src.ignore_char
|
||||
con.push_space
|
||||
when ?`
|
||||
read_inline_code(src,con)
|
||||
when ?<
|
||||
# It could be:
|
||||
# 1) HTML "<div ..."
|
||||
# 2) HTML "<!-- ..."
|
||||
# 3) url "<http:// ", "<ftp:// ..."
|
||||
# 4) email "<andrea@... ", "<mailto:andrea@..."
|
||||
# 5) on itself! "a < b "
|
||||
# 6) Start of <<guillemettes>>
|
||||
|
||||
case d = src.next_char
|
||||
when ?<; # guillemettes
|
||||
src.ignore_chars(2)
|
||||
con.push_char ?<
|
||||
con.push_char ?<
|
||||
when ?!;
|
||||
if src.cur_chars_are '<!--'
|
||||
read_inline_html(src, con)
|
||||
else
|
||||
con.push_char src.shift_char
|
||||
end
|
||||
when ??
|
||||
read_xml_instr_span(src, con)
|
||||
when ?\ , ?\t
|
||||
con.push_char src.shift_char
|
||||
else
|
||||
if src.next_matches(/<mailto:/) or
|
||||
src.next_matches(/<[\w\.]+\@/)
|
||||
read_email_el(src, con)
|
||||
elsif src.next_matches(/<\w+:/)
|
||||
read_url_el(src, con)
|
||||
elsif src.next_matches(/<\w/)
|
||||
#puts "This is HTML: #{src.cur_chars(20)}"
|
||||
read_inline_html(src, con)
|
||||
else
|
||||
#puts "This is NOT HTML: #{src.cur_chars(20)}"
|
||||
con.push_char src.shift_char
|
||||
end
|
||||
end
|
||||
when ?\\
|
||||
d = src.next_char
|
||||
if d == ?'
|
||||
src.ignore_chars(2)
|
||||
con.push_element md_entity('apos')
|
||||
elsif d == ?"
|
||||
src.ignore_chars(2)
|
||||
con.push_element md_entity('quot')
|
||||
elsif escaped.include? d
|
||||
src.ignore_chars(2)
|
||||
con.push_char d
|
||||
else
|
||||
con.push_char src.shift_char
|
||||
end
|
||||
when ?[
|
||||
if markdown_extra? && src.next_char == ?^
|
||||
read_footnote_ref(src,con)
|
||||
else
|
||||
read_link(src, con)
|
||||
end
|
||||
when ?!
|
||||
if src.next_char == ?[
|
||||
read_image(src, con)
|
||||
else
|
||||
con.push_char src.shift_char
|
||||
end
|
||||
when ?&
|
||||
if m = src.read_regexp(/\&([\w\d]+);/)
|
||||
con.push_element md_entity(m[1])
|
||||
else
|
||||
con.push_char src.shift_char
|
||||
end
|
||||
when ?*
|
||||
if not src.next_char
|
||||
maruku_error "Opening * as last char.", src, con
|
||||
maruku_recover "Threating as literal"
|
||||
con.push_char src.shift_char
|
||||
else
|
||||
follows = src.cur_chars(4)
|
||||
if follows =~ /^\*\*\*[^\s\*]/
|
||||
con.push_element read_emstrong(src,'***')
|
||||
elsif follows =~ /^\*\*[^\s\*]/
|
||||
con.push_element read_strong(src,'**')
|
||||
elsif follows =~ /^\*[^\s\*]/
|
||||
con.push_element read_em(src,'*')
|
||||
else # * is just a normal char
|
||||
con.push_char src.shift_char
|
||||
end
|
||||
end
|
||||
when ?_
|
||||
if not src.next_char
|
||||
maruku_error "Opening _ as last char", src, con
|
||||
maruku_recover "Threating as literal", src, con
|
||||
con.push_char src.shift_char
|
||||
else
|
||||
follows = src.cur_chars(4)
|
||||
if follows =~ /^\_\_\_[^\s\_]/
|
||||
con.push_element read_emstrong(src,'___')
|
||||
elsif follows =~ /^\_\_[^\s\_]/
|
||||
con.push_element read_strong(src,'__')
|
||||
elsif follows =~ /^\_[^\s\_]/
|
||||
con.push_element read_em(src,'_')
|
||||
else # _ is just a normal char
|
||||
con.push_char src.shift_char
|
||||
end
|
||||
end
|
||||
when ?{ # extension
|
||||
src.ignore_char # {
|
||||
interpret_extension(src, con, [?}])
|
||||
src.ignore_char # }
|
||||
when nil
|
||||
maruku_error ("Unclosed span (waiting for %s"+
|
||||
"#{exit_on_strings.inspect})") % [
|
||||
exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
|
||||
src,con
|
||||
break
|
||||
else # normal text
|
||||
con.push_char src.shift_char
|
||||
end # end case
|
||||
end # end while true
|
||||
con.push_string_if_present
|
||||
|
||||
# Assign IAL to elements
|
||||
merge_ial(con.elements, src, con)
|
||||
|
||||
|
||||
# Remove leading space
|
||||
if (s = con.elements.first).kind_of? String
|
||||
if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
|
||||
con.elements.shift if s.size == 0
|
||||
end
|
||||
|
||||
# Remove final spaces
|
||||
if (s = con.elements.last).kind_of? String
|
||||
s.chop! if s[-1] == ?\
|
||||
con.elements.pop if s.size == 0
|
||||
end
|
||||
|
||||
educated = educate(con.elements)
|
||||
|
||||
educated
|
||||
end
|
||||
|
||||
|
||||
def read_xml_instr_span(src, con)
|
||||
src.ignore_chars(2) # starting <?
|
||||
|
||||
# read target <?target code... ?>
|
||||
target = if m = src.read_regexp(/(\w+)/)
|
||||
m[1]
|
||||
else
|
||||
''
|
||||
end
|
||||
|
||||
delim = "?>"
|
||||
|
||||
code =
|
||||
read_simple(src, escaped=[], break_on_chars=[],
|
||||
break_on_strings=[delim])
|
||||
|
||||
src.ignore_chars delim.size
|
||||
|
||||
code = (code || "").strip
|
||||
con.push_element md_xml_instr(target, code)
|
||||
end
|
||||
|
||||
# Start: cursor on character **after** '{'
|
||||
# End: curson on '}' or EOF
|
||||
def interpret_extension(src, con, break_on_chars)
|
||||
case src.cur_char
|
||||
when ?:
|
||||
src.ignore_char # :
|
||||
extension_meta(src, con, break_on_chars)
|
||||
when ?#, ?.
|
||||
extension_meta(src, con, break_on_chars)
|
||||
else
|
||||
stuff = read_simple(src, escaped=[?}], break_on_chars, [])
|
||||
if stuff =~ /^(\w+\s|[^\w])/
|
||||
extension_id = $1.strip
|
||||
if false
|
||||
else
|
||||
maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
|
||||
"I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
|
||||
extension_meta(src, con, break_on_chars)
|
||||
end
|
||||
else
|
||||
maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
|
||||
extension_meta(src, con, break_on_chars)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def extension_meta(src, con, break_on_chars)
|
||||
if m = src.read_regexp(/(\w)+\:/)
|
||||
name = m[1]
|
||||
content = m[2]
|
||||
al = read_attribute_list(src, con, break_on_chars)
|
||||
self.doc.ald[name] = al
|
||||
con.push md_ald(name, al)
|
||||
else
|
||||
al = read_attribute_list(src, con, break_on_chars)
|
||||
self.doc.ald[name] = al
|
||||
con.push md_ial(al)
|
||||
end
|
||||
end
|
||||
|
||||
def read_url_el(src,con)
|
||||
src.ignore_char # leading <
|
||||
url = read_simple(src, [], [?>])
|
||||
src.ignore_char # closing >
|
||||
|
||||
con.push_element md_url(url)
|
||||
end
|
||||
|
||||
def read_email_el(src,con)
|
||||
src.ignore_char # leading <
|
||||
mail = read_simple(src, [], [?>])
|
||||
src.ignore_char # closing >
|
||||
|
||||
address = mail.gsub(/^mailto:/,'')
|
||||
con.push_element md_email(address)
|
||||
end
|
||||
|
||||
def read_url(src, break_on)
|
||||
if [?',?"].include? src.cur_char
|
||||
error 'Invalid char for url', src
|
||||
end
|
||||
|
||||
url = read_simple(src, [], break_on)
|
||||
if not url # empty url
|
||||
url = ""
|
||||
end
|
||||
|
||||
if url[0] == ?< && url[-1] == ?>
|
||||
url = url[1, url.size-2]
|
||||
end
|
||||
|
||||
if url.size == 0
|
||||
return nil
|
||||
end
|
||||
|
||||
url
|
||||
end
|
||||
|
||||
|
||||
def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
|
||||
case src.cur_char
|
||||
when ?', ?"
|
||||
read_quoted(src, con)
|
||||
else
|
||||
read_simple(src, escaped, exit_on_chars)
|
||||
end
|
||||
end
|
||||
|
||||
# Tries to read a quoted value. If stream does not
|
||||
# start with ' or ", returns nil.
|
||||
def read_quoted(src, con)
|
||||
case src.cur_char
|
||||
when ?', ?"
|
||||
quote_char = src.shift_char # opening quote
|
||||
string = read_simple(src, EscapedCharInQuotes, [quote_char])
|
||||
src.ignore_char # closing quote
|
||||
return string
|
||||
else
|
||||
# puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
|
||||
return nil
|
||||
end
|
||||
end
|
||||
|
||||
# Reads a simple string (no formatting) until one of break_on_chars,
|
||||
# while escaping the escaped.
|
||||
# If the string is empty, it returns nil.
|
||||
# Raises on error if the string terminates unexpectedly.
|
||||
# # If eat_delim is true, and if the delim is not the EOF, then the delim
|
||||
# # gets eaten from the stream.
|
||||
def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
|
||||
text = ""
|
||||
while true
|
||||
# puts "Reading simple #{text.inspect}"
|
||||
c = src.cur_char
|
||||
if exit_on_chars && exit_on_chars.include?(c)
|
||||
# src.ignore_char if eat_delim
|
||||
break
|
||||
end
|
||||
|
||||
break if exit_on_strings &&
|
||||
exit_on_strings.any? {|x| src.cur_chars_are x}
|
||||
|
||||
case c
|
||||
when nil
|
||||
s= "String finished while reading (break on "+
|
||||
"#{exit_on_chars.map{|x|""<<x}.inspect})"+
|
||||
" already read: #{text.inspect}"
|
||||
maruku_error s, src
|
||||
maruku_recover "I boldly continue", src
|
||||
break
|
||||
when ?\\
|
||||
d = src.next_char
|
||||
if escaped.include? d
|
||||
src.ignore_chars(2)
|
||||
text << d
|
||||
else
|
||||
text << src.shift_char
|
||||
end
|
||||
else
|
||||
text << src.shift_char
|
||||
end
|
||||
end
|
||||
# puts "Read simple #{text.inspect}"
|
||||
text.empty? ? nil : text
|
||||
end
|
||||
|
||||
def read_em(src, delim)
|
||||
src.ignore_char
|
||||
children = read_span(src, EscapedCharInText, nil, [delim])
|
||||
src.ignore_char
|
||||
md_em(children)
|
||||
end
|
||||
|
||||
def read_strong(src, delim)
|
||||
src.ignore_chars(2)
|
||||
children = read_span(src, EscapedCharInText, nil, [delim])
|
||||
src.ignore_chars(2)
|
||||
md_strong(children)
|
||||
end
|
||||
|
||||
def read_emstrong(src, delim)
|
||||
src.ignore_chars(3)
|
||||
children = read_span(src, EscapedCharInText, nil, [delim])
|
||||
src.ignore_chars(3)
|
||||
md_emstrong(children)
|
||||
end
|
||||
|
||||
SPACE = ?\ # = 32
|
||||
|
||||
# R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
|
||||
R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
|
||||
|
||||
# Reads a bracketed id "[refid]". Consumes also both brackets.
|
||||
def read_ref_id(src, con)
|
||||
src.ignore_char # [
|
||||
src.consume_whitespace
|
||||
# puts "Next: #{src.cur_chars(10).inspect}"
|
||||
if m = src.read_regexp(R_REF_ID)
|
||||
# puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
|
||||
# puts "Then: #{src.cur_chars(10).inspect}"
|
||||
m[1]
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
def read_footnote_ref(src,con)
|
||||
ref = read_ref_id(src,con)
|
||||
con.push_element md_foot_ref(ref)
|
||||
end
|
||||
|
||||
def read_inline_html(src, con)
|
||||
h = HTMLHelper.new
|
||||
begin
|
||||
# This is our current buffer in the context
|
||||
start = src.current_remaining_buffer
|
||||
|
||||
h.eat_this start
|
||||
if not h.is_finished?
|
||||
error "inline_html: Malformed:\n "+
|
||||
"#{start.inspect}\n #{h.inspect}",src,con
|
||||
end
|
||||
|
||||
consumed = start.size - h.rest.size
|
||||
if consumed > 0
|
||||
con.push_element md_html(h.stuff_you_read)
|
||||
src.ignore_chars(consumed)
|
||||
else
|
||||
puts "HTML helper did not work on #{start.inspect}"
|
||||
con.push_char src.shift_char
|
||||
end
|
||||
rescue Exception => e
|
||||
maruku_error "Bad html: \n" +
|
||||
add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
|
||||
src,con
|
||||
maruku_recover "I will try to continue after bad HTML.", src, con
|
||||
con.push_char src.shift_char
|
||||
end
|
||||
end
|
||||
|
||||
def read_inline_code(src, con)
|
||||
# Count the number of ticks
|
||||
num_ticks = 0
|
||||
while src.cur_char == ?`
|
||||
num_ticks += 1
|
||||
src.ignore_char
|
||||
end
|
||||
# We will read until this string
|
||||
end_string = "`"*num_ticks
|
||||
|
||||
code =
|
||||
read_simple(src, escaped=[], break_on_chars=[],
|
||||
break_on_strings=[end_string])
|
||||
|
||||
# puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
|
||||
src.ignore_chars num_ticks
|
||||
|
||||
# Ignore at most one space
|
||||
if num_ticks > 1 && code[0] == SPACE
|
||||
code = code[1, code.size-1]
|
||||
end
|
||||
|
||||
# drop last space
|
||||
if num_ticks > 1 && code[-1] == SPACE
|
||||
code = code[0,code.size-1]
|
||||
end
|
||||
|
||||
# puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
|
||||
con.push_element md_code(code)
|
||||
end
|
||||
|
||||
def read_link(src, con)
|
||||
# we read the string and see what happens
|
||||
src.ignore_char # opening bracket
|
||||
children = read_span(src, EscapedCharInText, [?]])
|
||||
src.ignore_char # closing bracket
|
||||
|
||||
# ignore space
|
||||
if src.cur_char == SPACE and
|
||||
(src.next_char == ?[ or src.next_char == ?( )
|
||||
src.shift_char
|
||||
end
|
||||
|
||||
case src.cur_char
|
||||
when ?(
|
||||
src.ignore_char # opening (
|
||||
src.consume_whitespace
|
||||
url = read_url(src, [SPACE,?\t,?)])
|
||||
if not url
|
||||
url = '' # no url is ok
|
||||
end
|
||||
src.consume_whitespace
|
||||
title = nil
|
||||
if src.cur_char != ?) # we have a title
|
||||
quote_char = src.cur_char
|
||||
title = read_quoted(src,con)
|
||||
|
||||
if not title
|
||||
maruku_error 'Must quote title',src,con
|
||||
else
|
||||
# Tries to read a title with quotes: 
|
||||
# this is the most ugly thing in Markdown
|
||||
if not src.next_matches(/\s*\)/)
|
||||
# if there is not a closing par ), then read
|
||||
# the rest and guess it's title with quotes
|
||||
rest = read_simple(src, escaped=[], break_on_chars=[?)],
|
||||
break_on_strings=[])
|
||||
# chop the closing char
|
||||
rest.chop!
|
||||
title << quote_char << rest
|
||||
end
|
||||
end
|
||||
end
|
||||
src.consume_whitespace
|
||||
closing = src.shift_char # closing )
|
||||
if closing != ?)
|
||||
maruku_error 'Unclosed link',src,con
|
||||
maruku_recover "No closing ): I will not create"+
|
||||
" the link for #{children.inspect}", src, con
|
||||
con.push_elements children
|
||||
return
|
||||
end
|
||||
con.push_element md_im_link(children,url, title)
|
||||
when ?[ # link ref
|
||||
ref_id = read_ref_id(src,con)
|
||||
if ref_id
|
||||
con.push_element md_link(children, ref_id)
|
||||
else
|
||||
maruku_error "Could not read ref_id", src, con
|
||||
maruku_recover "I will not create the link for "+
|
||||
"#{children.inspect}", src, con
|
||||
con.push_elements children
|
||||
return
|
||||
end
|
||||
else # empty [link]
|
||||
con.push_element md_link(children, "")
|
||||
end
|
||||
end # read link
|
||||
|
||||
def read_image(src, con)
|
||||
src.ignore_chars(2) # opening "!["
|
||||
alt_text = read_span(src, EscapedCharInText, [?]])
|
||||
src.ignore_char # closing bracket
|
||||
# ignore space
|
||||
if src.cur_char == SPACE and
|
||||
(src.next_char == ?[ or src.next_char == ?( )
|
||||
src.ignore_char
|
||||
end
|
||||
case src.cur_char
|
||||
when ?(
|
||||
src.ignore_char # opening (
|
||||
src.consume_whitespace
|
||||
url = read_url(src, [SPACE,?\t,?)])
|
||||
if not url
|
||||
error "Could not read url from #{src.cur_chars(10).inspect}",
|
||||
src,con
|
||||
end
|
||||
src.consume_whitespace
|
||||
title = nil
|
||||
if src.cur_char != ?) # we have a title
|
||||
quote_char = src.cur_char
|
||||
title = read_quoted(src,con)
|
||||
if not title
|
||||
maruku_error 'Must quote title',src,con
|
||||
else
|
||||
# Tries to read a title with quotes: 
|
||||
# this is the most ugly thing in Markdown
|
||||
if not src.next_matches(/\s*\)/)
|
||||
# if there is not a closing par ), then read
|
||||
# the rest and guess it's title with quotes
|
||||
rest = read_simple(src, escaped=[], break_on_chars=[?)],
|
||||
break_on_strings=[])
|
||||
# chop the closing char
|
||||
rest.chop!
|
||||
title << quote_char << rest
|
||||
end
|
||||
end
|
||||
end
|
||||
src.consume_whitespace
|
||||
closing = src.shift_char # closing )
|
||||
if closing != ?)
|
||||
error ("Unclosed link: '"<<closing<<"'")+
|
||||
" Read url=#{url.inspect} title=#{title.inspect}",src,con
|
||||
end
|
||||
con.push_element md_im_image(alt_text, url, title)
|
||||
when ?[ # link ref
|
||||
ref_id = read_ref_id(src,con)
|
||||
con.push_element md_image(alt_text, ref_id)
|
||||
else # no stuff
|
||||
con.push_elements children
|
||||
end
|
||||
end # read link
|
||||
|
||||
|
||||
class SpanContext
|
||||
include MaRuKu::Strings
|
||||
|
||||
# Read elements
|
||||
attr_accessor :elements
|
||||
attr_accessor :cur_string
|
||||
|
||||
def initialize
|
||||
@elements = []
|
||||
@cur_string = ""
|
||||
end
|
||||
|
||||
def push_element(e)
|
||||
raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
|
||||
not (e.kind_of?(String) or e.kind_of?(MDElement))
|
||||
|
||||
push_string_if_present
|
||||
@elements << e
|
||||
nil
|
||||
end
|
||||
alias push push_element
|
||||
|
||||
def push_elements(a)
|
||||
for e in a
|
||||
if e.kind_of? String
|
||||
e.each_byte do |b| push_char b end
|
||||
else
|
||||
push_element e
|
||||
end
|
||||
end
|
||||
end
|
||||
def push_string_if_present
|
||||
if @cur_string.size > 0
|
||||
@elements << @cur_string
|
||||
@cur_string = ""
|
||||
end
|
||||
nil
|
||||
end
|
||||
|
||||
def push_char(c)
|
||||
@cur_string << c
|
||||
nil
|
||||
end
|
||||
|
||||
# push space into current string if
|
||||
# there isn't one
|
||||
def push_space
|
||||
last = @cur_string[@cur_string.size-1]
|
||||
@cur_string << ?\ if last != ?\
|
||||
end
|
||||
|
||||
def describe
|
||||
lines = @elements.map{|x| x.inspect}.join("\n")
|
||||
s = "Elements read in span: \n" +
|
||||
add_tabs(lines,1, ' -')+"\n"
|
||||
|
||||
if @cur_string.size > 0
|
||||
s += "Current string: \n #{@cur_string.inspect}\n"
|
||||
end
|
||||
s
|
||||
end
|
||||
end # SpanContext
|
||||
|
||||
end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser
|
||||
|
225
lib/maruku/input/rubypants.rb
Normal file
225
lib/maruku/input/rubypants.rb
Normal file
|
@ -0,0 +1,225 @@
|
|||
#--
|
||||
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
||||
#
|
||||
# This file is part of Maruku.
|
||||
#
|
||||
# Maruku is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Maruku is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Maruku; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#++
|
||||
|
||||
#
|
||||
# NOTA BENE:
|
||||
#
|
||||
# The following algorithm is a rip-off of RubyPants written by
|
||||
# Christian Neukirchen.
|
||||
#
|
||||
# RubyPants is a Ruby port of SmartyPants written by John Gruber.
|
||||
#
|
||||
# This file is distributed under the GPL, which I guess is compatible
|
||||
# with the terms of the RubyPants license.
|
||||
#
|
||||
# -- Andrea Censi
|
||||
|
||||
|
||||
# = RubyPants -- SmartyPants ported to Ruby
|
||||
#
|
||||
# Ported by Christian Neukirchen <mailto:chneukirchen@gmail.com>
|
||||
# Copyright (C) 2004 Christian Neukirchen
|
||||
#
|
||||
# Incooporates ideas, comments and documentation by Chad Miller
|
||||
# Copyright (C) 2004 Chad Miller
|
||||
#
|
||||
# Original SmartyPants by John Gruber
|
||||
# Copyright (C) 2003 John Gruber
|
||||
#
|
||||
|
||||
#
|
||||
# = RubyPants -- SmartyPants ported to Ruby
|
||||
#
|
||||
#
|
||||
# [snip]
|
||||
#
|
||||
# == Authors
|
||||
#
|
||||
# John Gruber did all of the hard work of writing this software in
|
||||
# Perl for Movable Type and almost all of this useful documentation.
|
||||
# Chad Miller ported it to Python to use with Pyblosxom.
|
||||
#
|
||||
# Christian Neukirchen provided the Ruby port, as a general-purpose
|
||||
# library that follows the *Cloth API.
|
||||
#
|
||||
#
|
||||
# == Copyright and License
|
||||
#
|
||||
# === SmartyPants license:
|
||||
#
|
||||
# Copyright (c) 2003 John Gruber
|
||||
# (http://daringfireball.net)
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
#
|
||||
# * Neither the name "SmartyPants" nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this
|
||||
# software without specific prior written permission.
|
||||
#
|
||||
# This software is provided by the copyright holders and contributors
|
||||
# "as is" and any express or implied warranties, including, but not
|
||||
# limited to, the implied warranties of merchantability and fitness
|
||||
# for a particular purpose are disclaimed. In no event shall the
|
||||
# copyright owner or contributors be liable for any direct, indirect,
|
||||
# incidental, special, exemplary, or consequential damages (including,
|
||||
# but not limited to, procurement of substitute goods or services;
|
||||
# loss of use, data, or profits; or business interruption) however
|
||||
# caused and on any theory of liability, whether in contract, strict
|
||||
# liability, or tort (including negligence or otherwise) arising in
|
||||
# any way out of the use of this software, even if advised of the
|
||||
# possibility of such damage.
|
||||
#
|
||||
# === RubyPants license
|
||||
#
|
||||
# RubyPants is a derivative work of SmartyPants and smartypants.py.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
#
|
||||
# This software is provided by the copyright holders and contributors
|
||||
# "as is" and any express or implied warranties, including, but not
|
||||
# limited to, the implied warranties of merchantability and fitness
|
||||
# for a particular purpose are disclaimed. In no event shall the
|
||||
# copyright owner or contributors be liable for any direct, indirect,
|
||||
# incidental, special, exemplary, or consequential damages (including,
|
||||
# but not limited to, procurement of substitute goods or services;
|
||||
# loss of use, data, or profits; or business interruption) however
|
||||
# caused and on any theory of liability, whether in contract, strict
|
||||
# liability, or tort (including negligence or otherwise) arising in
|
||||
# any way out of the use of this software, even if advised of the
|
||||
# possibility of such damage.
|
||||
#
|
||||
#
|
||||
# == Links
|
||||
#
|
||||
# John Gruber:: http://daringfireball.net
|
||||
# SmartyPants:: http://daringfireball.net/projects/smartypants
|
||||
#
|
||||
# Chad Miller:: http://web.chad.org
|
||||
#
|
||||
# Christian Neukirchen:: http://kronavita.de/chris
|
||||
|
||||
|
||||
module MaRuKu; module In; module Markdown; module SpanLevelParser
|
||||
Punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
|
||||
Close_class = %![^\ \t\r\n\\[\{\(\-]!
|
||||
|
||||
Rules = [
|
||||
[/---/, :mdash ],
|
||||
[/--/, :ndash ],
|
||||
['...', :hellip ],
|
||||
['. . .', :hellip ],
|
||||
["``", :ldquo ],
|
||||
["''", :rdquo ],
|
||||
[/<<\s/, [:laquo, :nbsp] ],
|
||||
[/\s>>/, [:nbsp, :raquo] ],
|
||||
[/<</, :laquo ],
|
||||
[/>>/, :raquo ],
|
||||
|
||||
# def educate_single_backticks(str)
|
||||
# ["`", :lsquo]
|
||||
# ["'", :rsquo]
|
||||
|
||||
# Special case if the very first character is a quote followed by
|
||||
# punctuation at a non-word-break. Close the quotes by brute
|
||||
# force:
|
||||
[/^'(?=#{Punct_class}\B)/, :rsquo],
|
||||
[/^"(?=#{Punct_class}\B)/, :rdquo],
|
||||
# Special case for double sets of quotes, e.g.:
|
||||
# <p>He said, "'Quoted' words in a larger quote."</p>
|
||||
[/"'(?=\w)/, [:ldquo, :lsquo] ],
|
||||
[/'"(?=\w)/, [:lsquo, :ldquo] ],
|
||||
# Special case for decade abbreviations (the '80s):
|
||||
[/'(?=\d\ds)/, :rsquo ],
|
||||
# Get most opening single quotes:
|
||||
[/(\s)'(?=\w)/, [:one, :lsquo] ],
|
||||
# Single closing quotes:
|
||||
[/(#{Close_class})'/, [:one, :rsquo]],
|
||||
[/'(\s|s\b|$)/, [:rsquo, :one]],
|
||||
# Any remaining single quotes should be opening ones:
|
||||
[/'/, :lsquo],
|
||||
# Get most opening double quotes:
|
||||
[/(\s)"(?=\w)/, [:one, :ldquo]],
|
||||
# Double closing quotes:
|
||||
[/(#{Close_class})"/, [:one, :rdquo]],
|
||||
[/"(\s|s\b|$)/, [:rdquo, :one]],
|
||||
# Any remaining quotes should be opening ones:
|
||||
[/"/, :ldquo]
|
||||
].
|
||||
map{|reg, subst| # People should do the thinking, machines should do the work.
|
||||
reg = Regexp.new(Regexp.escape(reg)) if not reg.kind_of? Regexp
|
||||
subst = [subst] if not subst.kind_of?Array
|
||||
[reg, subst]}
|
||||
|
||||
# note: input will be destroyed
|
||||
def apply_one_rule(reg, subst, input)
|
||||
output = []
|
||||
while first = input.shift
|
||||
if first.kind_of?(String) && (m = reg.match(first))
|
||||
output.push m. pre_match if m. pre_match.size > 0
|
||||
input.unshift m.post_match if m.post_match.size > 0
|
||||
subst.reverse.each do |x|
|
||||
input.unshift( x == :one ? m[1] : md_entity(x.to_s) ) end
|
||||
else
|
||||
output.push first
|
||||
end
|
||||
end
|
||||
return output
|
||||
end
|
||||
|
||||
def educate(elements)
|
||||
Rules.each do |reg, subst|
|
||||
elements = apply_one_rule(reg, subst, elements)
|
||||
end
|
||||
# strips empty strings
|
||||
elements.delete_if {|x| x.kind_of?(String) && x.size == 0}
|
||||
final = []
|
||||
# join consecutive strings
|
||||
elements.each do |x|
|
||||
if x.kind_of?(String) && final.last.kind_of?(String)
|
||||
final.last << x
|
||||
else
|
||||
final << x
|
||||
end
|
||||
end
|
||||
return final
|
||||
end
|
||||
|
||||
end end end end
|
141
lib/maruku/input/type_detection.rb
Normal file
141
lib/maruku/input/type_detection.rb
Normal file
|
@ -0,0 +1,141 @@
|
|||
#--
|
||||
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
||||
#
|
||||
# This file is part of Maruku.
|
||||
#
|
||||
# Maruku is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Maruku is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Maruku; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#++
|
||||
|
||||
class String
|
||||
include MaRuKu::Strings
|
||||
def md_type()
|
||||
@md_type ||= line_md_type(self)
|
||||
end
|
||||
end
|
||||
|
||||
class NilClass
|
||||
def md_type() nil end
|
||||
|
||||
end
|
||||
|
||||
# This code does the classification of lines for block-level parsing.
|
||||
module MaRuKu; module Strings
|
||||
|
||||
def line_md_type(l)
|
||||
# The order of evaluation is important (:text is a catch-all)
|
||||
return :text if l =~ /^[a-zA-Z]/
|
||||
return :code if number_of_leading_spaces(l)>=4
|
||||
return :empty if l =~ /^\s*$/
|
||||
return :footnote_text if l =~ FootnoteText
|
||||
return :ref_definition if l =~ LinkRegex or l=~ IncompleteLink
|
||||
return :abbreviation if l =~ Abbreviation
|
||||
return :definition if l =~ Definition
|
||||
# I had a bug with emails and urls at the beginning of the
|
||||
# line that were mistaken for raw_html
|
||||
return :text if l=~EMailAddress or l=~ URL
|
||||
# raw html is like PHP Markdown Extra: at most three spaces before
|
||||
return :xml_instr if l =~ %r{^\s*<\?}
|
||||
return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
|
||||
return :raw_html if l =~ %r{[ ]{0,3}<\!\-\-}
|
||||
return :ulist if l =~ /^\s?([\*\-\+])\s+.*\w+/
|
||||
return :olist if l =~ /^\s?\d+\..*\w+/
|
||||
return :header1 if l =~ /^(=)+/
|
||||
return :header2 if l =~ /^([-\s])+$/
|
||||
return :header3 if l =~ /^(#)+\s*\S+/
|
||||
# at least three asterisks on a line, and only whitespace
|
||||
return :hrule if l =~ /^(\s*\*\s*){3,1000}$/
|
||||
return :hrule if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
|
||||
return :hrule if l =~ /^(\s*_\s*){3,1000}$/ # or underscores
|
||||
return :quote if l =~ /^>/
|
||||
return :metadata if l =~ /^@/
|
||||
# if @@new_meta_data?
|
||||
return :ald if l =~ AttributeDefinitionList
|
||||
return :ial if l =~ InlineAttributeList
|
||||
# end
|
||||
# return :equation_end if l =~ EquationEnd
|
||||
return :text # else, it's just text
|
||||
end
|
||||
|
||||
|
||||
# $1 = id $2 = attribute list
|
||||
AttributeDefinitionList = /^\s{0,3}\{([\w\d\s]+)\}:\s*(.*)\s*$/
|
||||
#
|
||||
InlineAttributeList = /^\s{0,3}\{(.*)\}\s*$/
|
||||
# Example:
|
||||
# ^:blah blah
|
||||
# ^: blah blah
|
||||
# ^ : blah blah
|
||||
Definition = %r{
|
||||
^ # begin of line
|
||||
[ ]{0,3} # up to 3 spaces
|
||||
: # colon
|
||||
\s* # whitespace
|
||||
(\S.*) # the text = $1
|
||||
$ # end of line
|
||||
}x
|
||||
|
||||
# Example:
|
||||
# *[HTML]: Hyper Text Markup Language
|
||||
Abbreviation = %r{
|
||||
^ # begin of line
|
||||
\* # one asterisk
|
||||
\[ # opening bracket
|
||||
([^\]]+) # any non-closing bracket: id = $1
|
||||
\] # closing bracket
|
||||
: # colon
|
||||
\s* # whitespace
|
||||
(\S.*\S)* # definition=$2
|
||||
\s* # strip this whitespace
|
||||
$ # end of line
|
||||
}x
|
||||
|
||||
FootnoteText = %r{
|
||||
^\s*\[(\^.+)\]: # id = $1 (including '^')
|
||||
\s*(\S.*)?$ # text = $2 (not obb.)
|
||||
}x
|
||||
|
||||
# This regex is taken from BlueCloth sources
|
||||
# Link defs are in the form: ^[id]: \n? url "optional title"
|
||||
LinkRegex = %r{
|
||||
^[ ]*\[([^\]]+)\]: # id = $1
|
||||
[ ]*
|
||||
<?(\S+)>? # url = $2
|
||||
[ ]*
|
||||
(?:# Titles are delimited by "quotes" or (parens).
|
||||
["(']
|
||||
(.+?) # title = $3
|
||||
[")'] # Matching ) or "
|
||||
\s*(.+)? # stuff = $4
|
||||
)? # title is optional
|
||||
}x
|
||||
|
||||
IncompleteLink = %r{^\s*\[(.+)\]:\s*$}
|
||||
|
||||
HeaderWithId = /^(.*)\{\#([\w_-]+)\}\s*$/
|
||||
|
||||
HeaderWithAttributes = /^(.*)\{(.*)\}\s*$/
|
||||
|
||||
|
||||
# if contains a pipe, it could be a table header
|
||||
MightBeTableHeader = %r{\|}
|
||||
# -------------:
|
||||
Sep = /\s*(\:)?\s*-+\s*(\:)?\s*/
|
||||
# | -------------:| ------------------------------ |
|
||||
TableSeparator = %r{^(\|?#{Sep}\|?)+\s*$}
|
||||
|
||||
|
||||
EMailAddress = /<([^:]+@[^:]+)>/
|
||||
URL = /^<http:/
|
||||
end end
|
Loading…
Add table
Add a link
Reference in a new issue