Bring up to current.

This commit is contained in:
Jacques Distler 2007-01-22 08:36:51 -06:00
parent 69b62b6f33
commit b19e1e4f47
71 changed files with 8305 additions and 39 deletions

View file

@ -0,0 +1,325 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu; module In; module Markdown; module SpanLevelParser
# a string scanner coded by me
class CharSourceManual; end
# a wrapper around StringScanner
class CharSourceStrscan; end
# A debug scanner that checks the correctness of both
# by comparing their output
class CharSourceDebug; end
# Choose!
CharSource = CharSourceManual # faster! 58ms vs. 65ms
#CharSource = CharSourceStrscan
#CharSource = CharSourceDebug
class CharSourceManual
include MaRuKu::Strings
def initialize(s, parent=nil)
raise "Passed #{s.class}" if not s.kind_of? String
@buffer = s
@buffer_index = 0
@parent = parent
end
# Return current char as a FixNum (or nil).
def cur_char; @buffer[@buffer_index] end
# Return the next n chars as a String.
def cur_chars(n); @buffer[@buffer_index,n] end
# Return the char after current char as a FixNum (or nil).
def next_char; @buffer[@buffer_index+1] end
def shift_char
c = @buffer[@buffer_index]
@buffer_index+=1
c
end
def ignore_char
@buffer_index+=1
nil
end
def ignore_chars(n)
@buffer_index+=n
nil
end
def current_remaining_buffer
@buffer[@buffer_index, @buffer.size-@buffer_index]
end
def cur_chars_are(string)
# There is a bug here
if false
r2 = /^.{#{@buffer_index}}#{Regexp.escape string}/m
@buffer =~ r2
else
cur_chars(string.size) == string
end
end
def next_matches(r)
r2 = /^.{#{@buffer_index}}#{r}/m
md = r2.match @buffer
return !!md
end
def read_regexp3(r)
r2 = /^.{#{@buffer_index}}#{r}/m
m = r2.match @buffer
if m
consumed = m.to_s.size - @buffer_index
# puts "Consumed #{consumed} chars (entire is #{m.to_s.inspect})"
ignore_chars consumed
else
# puts "Could not read regexp #{r2.inspect} from buffer "+
# " index=#{@buffer_index}"
# puts "Cur chars = #{cur_chars(20).inspect}"
# puts "Matches? = #{cur_chars(20) =~ r}"
end
m
end
def read_regexp(r)
r2 = /^#{r}/
rest = current_remaining_buffer
m = r2.match(rest)
if m
@buffer_index += m.to_s.size
# puts "#{r} matched #{rest.inspect}: #{m.to_s.inspect}"
end
return m
end
def consume_whitespace
while c = cur_char
if (c == 32 || c == ?\t)
# puts "ignoring #{c}"
ignore_char
else
# puts "#{c} is not ws: "<<c
break
end
end
end
def read_text_chars(out)
s = @buffer.size; c=nil
while @buffer_index < s && (c=@buffer[@buffer_index]) &&
((c>=?a && c<=?z) || (c>=?A && c<=?Z))
out << c
@buffer_index += 1
end
end
def describe
s = describe_pos(@buffer, @buffer_index)
if @parent
s += "\n\n" + @parent.describe
end
s
end
include SpanLevelParser
end
def describe_pos(buffer, buffer_index)
len = 75
num_before = [len/2, buffer_index].min
num_after = [len/2, buffer.size-buffer_index].min
num_before_max = buffer_index
num_after_max = buffer.size-buffer_index
# puts "num #{num_before} #{num_after}"
num_before = [num_before_max, len-num_after].min
num_after = [num_after_max, len-num_before].min
# puts "num #{num_before} #{num_after}"
index_start = [buffer_index - num_before, 0].max
index_end = [buffer_index + num_after, buffer.size].min
size = index_end- index_start
# puts "- #{index_start} #{size}"
str = buffer[index_start, size]
str.gsub!("\n",'N')
str.gsub!("\t",'T')
if index_end == buffer.size
str += "EOF"
end
pre_s = buffer_index-index_start
pre_s = [pre_s, 0].max
pre_s2 = [len-pre_s,0].max
# puts "pre_S = #{pre_s}"
pre =" "*(pre_s)
"-"*len+"\n"+
str + "\n" +
"-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
# pre + "|\n"+
pre + "+--- Byte #{buffer_index}\n"+
"Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
add_tabs(buffer,1,">")
# "CharSource: At character #{@buffer_index} of block "+
# " beginning with:\n #{@buffer[0,50].inspect} ...\n"+
# " before: \n ... #{cur_chars(50).inspect} ... "
end
require 'strscan'
class CharSourceStrscan
include SpanLevelParser
include MaRuKu::Strings
def initialize(s)
@s = StringScanner.new(s)
end
# Return current char as a FixNum (or nil).
def cur_char
@s.peek(1)[0]
end
# Return the next n chars as a String.
def cur_chars(n);
@s.peek(n)
end
# Return the char after current char as a FixNum (or nil).
def next_char;
@s.peek(2)[1]
end
def shift_char
(@s.get_byte)[0]
end
def ignore_char
@s.get_byte
nil
end
def ignore_chars(n)
n.times do @s.get_byte end
nil
end
def current_remaining_buffer
@s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index]
end
def cur_chars_are(string)
cur_chars(string.size) == string
end
def next_matches(r)
len = @s.match?(r)
return !!len
end
def read_regexp(r)
string = @s.scan(r)
if string
return r.match(string)
else
return nil
end
end
def consume_whitespace
@s.scan /\s+/
nil
end
def describe
describe_pos(@s.string, @s.pos)
end
end
class CharSourceDebug
def initialize(s)
@a = CharSourceManual.new(s)
@b = CharSourceStrscan.new(s)
end
def method_missing(methodname, *args)
a_bef = @a.describe
b_bef = @b.describe
a = @a.send(methodname, *args)
b = @b.send(methodname, *args)
# if methodname == :describe
# return a
# end
if a.kind_of? MatchData
if a.to_a != b.to_a
puts "called: #{methodname}(#{args})"
puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
puts "AFTER: "+@a.describe
puts "AFTER: "+@b.describe
puts "BEFORE: "+a_bef
puts "BEFORE: "+b_bef
puts caller.join("\n")
exit
end
else
if a!=b
puts "called: #{methodname}(#{args})"
puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
puts ""+@a.describe
puts ""+@b.describe
puts caller.join("\n")
exit
end
end
if @a.cur_char != @b.cur_char
puts "Fuori sincronia dopo #{methodname}(#{args})"
puts ""+@a.describe
puts ""+@b.describe
exit
end
return a
end
end
end end end end

View file

@ -0,0 +1,68 @@
module MaRuKu; module In; module Markdown
# Hash Fixnum -> name
SpanExtensionsTrigger = {}
class SpanExtension
# trigging chars
attr_accessor :chars
# trigging regexp
attr_accessor :regexp
# lambda
attr_accessor :block
end
# Hash String -> Extension
SpanExtensions = {}
def check_span_extensions(src, con)
c = src.cur_char
if extensions = SpanExtensionsTrigger[c]
extensions.each do |e|
if e.regexp && (match = src.next_matches(e.regexp))
return true if e.block.call(doc, src, con)
end
end
end
return false # not special
end
def self.register_span_extension(args, &block)
e = SpanExtension.new
e.chars = [*args[:chars]]
e.regexp = args[:regexp]
e.block = block
e.chars.each do |c|
(SpanExtensionsTrigger[c] ||= []).push e
end
end
def self.register_block_extension(args, &block)
regexp = args[:regexp]
BlockExtensions[regexp] = block
end
# Hash Regexp -> Block
BlockExtensions = {}
def check_block_extensions(src, con, line)
BlockExtensions.each do |reg, block|
if m = reg.match(line)
block = BlockExtensions[reg]
return true if block.call(doc, src, con)
end
end
return false # not special
end
def any_matching_block_extension?(line)
BlockExtensions.each_key do |reg|
m = reg.match(line)
return m if m
end
return false
end
end end end

View file

@ -0,0 +1,144 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu; module In; module Markdown; module SpanLevelParser
# This class helps me read and sanitize HTML blocks
# I tried to do this with REXML, but wasn't able to. (suggestions?)
class HTMLHelper
include MaRuKu::Strings
Tag = %r{^<(/)?(\w+)\s*([^>]*)>}m
EverythingElse = %r{^[^<]+}m
CommentStart = %r{^<!--}x
CommentEnd = %r{^.*-->}
TO_SANITIZE = ['img','hr']
# attr_accessor :inside_comment
attr_reader :rest
def initialize
@rest = ""
@tag_stack = []
@m = nil
@already = ""
@inside_comment = false
end
def eat_this(line)
@rest = line + @rest
things_read = 0
until @rest.empty?
if @inside_comment
if @m = CommentEnd.match(@rest)
@inside_comment = false
@already += @m.pre_match + @m.to_s
@rest = @m.post_match
elsif @m = EverythingElse.match(@rest)
@already += @m.pre_match + @m.to_s
@rest = @m.post_match
end
else
if @m = CommentStart.match(@rest)
things_read += 1
@inside_comment = true
@already += @m.pre_match + @m.to_s
@rest = @m.post_match
elsif @m = Tag.match(@rest)
things_read += 1
@already += @m.pre_match
@rest = @m.post_match
is_closing = !!@m[1]
tag = @m[2]
attributes = @m[3]
is_single = false
if attributes =~ /\A(.*)\/\Z/
attributes = $1
is_single = true
end
if TO_SANITIZE.include? tag
attributes.strip!
# puts "Attributes: #{attributes.inspect}"
if attributes.size > 0
@already += '<%s %s />' % [tag, attributes]
else
@already += '<%s />' % [tag]
end
elsif is_closing
@already += @m.to_s
if @tag_stack.empty?
error "Malformed: closing tag #{tag.inspect} "+
"in empty list"
end
if @tag_stack.last != tag
error "Malformed: tag <#{tag}> "+
"closes <#{@tag_stack.last}>"
end
@tag_stack.pop
elsif not is_single
@tag_stack.push tag
@already += @m.to_s
end
elsif @m = EverythingElse.match(@rest)
@already += @m.pre_match + @m.to_s
@rest = @m.post_match
else
error "Malformed HTML: not complete: #{@rest.inspect}"
end
end # not inside comment
# puts inspect
# puts "Read: #{@tag_stack.inspect}"
break if is_finished? and things_read>0
end
end
def error(s)
raise Exception, "Error: #{s} \n"+ inspect, caller
end
def inspect; "HTML READER\n comment=#{@inside_comment} "+
"match=#{@m.to_s.inspect}\n"+
"Tag stack = #{@tag_stack.inspect} \n"+
"Before:\n"+
add_tabs(@already,1,'|')+"\n"+
"After:\n"+
add_tabs(@rest,1,'|')+"\n"
end
def stuff_you_read
@already
end
def is_finished?
not @inside_comment and @tag_stack.empty?
end
end # html helper
end end end end

View file

@ -0,0 +1,111 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu; module In; module Markdown; module BlockLevelParser
# This represents a source of lines that can be consumed.
#
# It is the twin of CharSource.
#
class LineSource
include MaRuKu::Strings
def initialize(lines, parent=nil, parent_offset=nil)
raise "NIL lines? " if not lines
@lines = lines
@lines_index = 0
@parent = parent
@parent_offset = parent_offset
end
def cur_line() @lines[@lines_index] end
def next_line() @lines[@lines_index+1] end
def shift_line()
raise "Over the rainbow" if @lines_index >= @lines.size
l = @lines[@lines_index]
@lines_index += 1
return l
end
def ignore_line
raise "Over the rainbow" if @lines_index >= @lines.size
@lines_index += 1
end
def describe
#s = "At line ##{@lines_index} of #{@lines.size}:\n"
s = "At line #{original_line_number(@lines_index)}\n"
context = 3 # lines
from = [@lines_index-context, 0].max
to = [@lines_index+context, @lines.size-1].min
for i in from..to
prefix = (i == @lines_index) ? '--> ' : ' ';
l = @lines[i]
s += "%10s %4s|#{l}" %
[@lines[i].md_type.to_s, prefix]
s += "|\n"
end
# if @parent
# s << "Parent context is: \n"
# s << add_tabs(@parent.describe,1,'|')
# end
s
end
def original_line_number(index)
if @parent
return index + @parent.original_line_number(@parent_offset)
else
1 + index
end
end
def cur_index
@lines_index
end
# Returns the type of next line as a string
# breaks at first :definition
def tell_me_the_future
s = ""; num_e = 0;
for i in @lines_index..@lines.size-1
c = case @lines[i].md_type
when :text; "t"
when :empty; num_e+=1; "e"
when :definition; "d"
else "o"
end
s += c
break if c == "d" or num_e>1
end
s
end
end # linesource
end end end end # block

View file

@ -0,0 +1,594 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu; module In; module Markdown; module BlockLevelParser
include Helpers
include MaRuKu::Strings
include MaRuKu::In::Markdown::SpanLevelParser
class BlockContext < Array
def describe
n = 5
desc = size > n ? self[-n,n] : self
"Last #{n} elements: "+
desc.map{|x| "\n -" + x.inspect}.join
end
end
# Splits the string and calls parse_lines_as_markdown
def parse_text_as_markdown(text)
lines = split_lines(text)
src = LineSource.new(lines)
return parse_blocks(src)
end
# Input is a LineSource
def parse_blocks(src)
output = BlockContext.new
# run state machine
while src.cur_line
next if check_block_extensions(src, output, src.cur_line)
# Prints detected type (useful for debugging)
# puts "#{src.cur_line.md_type}|#{src.cur_line}"
case src.cur_line.md_type
when :empty;
output.push :empty
src.ignore_line
when :ial
m = InlineAttributeList.match src.shift_line
content = m[1] || ""
src2 = CharSource.new(content, src)
interpret_extension(src2, output, [nil])
when :ald
output.push read_ald(src)
when :text
if src.cur_line =~ MightBeTableHeader and
(src.next_line && src.next_line =~ TableSeparator)
output.push read_table(src)
elsif [:header1,:header2].include? src.next_line.md_type
output.push read_header12(src)
elsif eventually_comes_a_def_list(src)
definition = read_definition(src)
if output.last.kind_of?(MDElement) &&
output.last.node_type == :definition_list then
output.last.children << definition
else
output.push md_el(:definition_list, [definition])
end
else # Start of a paragraph
output.push read_paragraph(src)
end
when :header2, :hrule
# hrule
src.shift_line
output.push md_hrule()
when :header3
output.push read_header3(src)
when :ulist, :olist
list_type = src.cur_line.md_type == :ulist ? :ul : :ol
li = read_list_item(src)
# append to current list if we have one
if output.last.kind_of?(MDElement) &&
output.last.node_type == list_type then
output.last.children << li
else
output.push md_el(list_type, [li])
end
when :quote; output.push read_quote(src)
when :code; e = read_code(src); output << e if e
when :raw_html; e = read_raw_html(src); output << e if e
when :footnote_text; output.push read_footnote_text(src)
when :ref_definition; output.push read_ref_definition(src)
when :abbreviation; output.push read_abbreviation(src)
when :xml_instr; read_xml_instruction(src, output)
when :metadata;
maruku_error "Please use the new meta-data syntax: \n"+
" http://maruku.rubyforge.org/proposal.html\n", src
src.ignore_line
else # warn if we forgot something
md_type = src.cur_line.md_type
line = src.cur_line
maruku_error "Ignoring line '#{line}' type = #{md_type}", src
src.shift_line
end
end
merge_ial(output, src, output)
output.delete_if {|x| x.kind_of?(MDElement) &&
x.node_type == :ial}
# get rid of empty line markers
output.delete_if {|x| x == :empty}
# See for each list if we can omit the paragraphs and use li_span
# TODO: do this after
output.each do |c|
# Remove paragraphs that we can get rid of
if [:ul,:ol].include? c.node_type
if c.children.all? {|li| !li.want_my_paragraph} then
c.children.each do |d|
d.node_type = :li_span
d.children = d.children[0].children
end
end
end
if c.node_type == :definition_list
if c.children.all?{|defi| !defi.want_my_paragraph} then
c.children.each do |definition|
definition.definitions.each do |dd|
dd.children = dd.children[0].children
end
end
end
end
end
output
end
def read_ald(src)
if (l=src.shift_line) =~ AttributeDefinitionList
id = $1; al=$2;
al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])
self.ald[id] = al;
return md_ald(id, al)
else
maruku_error "Bug Bug:\n#{l.inspect}"
return nil
end
end
# reads a header (with ----- or ========)
def read_header12(src)
line = src.shift_line.strip
al = nil
# Check if there is an IAL
if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
line = $1.strip
ial = $2
al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
end
text = parse_lines_as_span [ line ]
level = src.cur_line.md_type == :header2 ? 2 : 1;
src.shift_line
return md_header(level, text, al)
end
# reads a header like '#### header ####'
def read_header3(src)
line = src.shift_line.strip
al = nil
# Check if there is an IAL
if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
line = $1.strip
ial = $2
al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
end
level = num_leading_hashes(line)
text = parse_lines_as_span [strip_hashes(line)]
return md_header(level, text, al)
end
def read_xml_instruction(src, output)
m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
raise "BugBug" if not m
target = m[2] || ''
code = m[3]
until code =~ /\?>/
code += "\n"+src.shift_line
end
if not code =~ (/\?>\s*$/)
garbage = (/\?>(.*)$/.match(code))[1]
maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+
add_tabs(code, 1, '|'), src
end
code.gsub!(/\?>\s*$/, '')
if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
result = safe_execute_code(self, code)
if result
if result.kind_of? String
raise "Not expected"
else
output.push *result
end
end
else
output.push md_xml_instr(target, code)
end
end
def read_raw_html(src)
h = HTMLHelper.new
begin
h.eat_this(l=src.shift_line)
# puts "\nBLOCK:\nhtml -> #{l.inspect}"
while src.cur_line and not h.is_finished?
l=src.shift_line
# puts "html -> #{l.inspect}"
h.eat_this "\n"+l
end
rescue Exception => e
ex = e.inspect + e.backtrace.join("\n")
maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'|')}\n", src
end
raw_html = h.stuff_you_read
return md_html(raw_html)
end
def read_paragraph(src)
lines = []
while src.cur_line
# :olist does not break
case t = src.cur_line.md_type
when :quote,:header3,:empty,:raw_html,:ref_definition,:ial,:xml_instr
break
when :olist,:ulist
break if src.next_line.md_type == t
end
break if src.cur_line.strip.size == 0
break if [:header1,:header2].include? src.next_line.md_type
break if any_matching_block_extension?(src.cur_line)
lines << src.shift_line
end
# dbg_describe_ary(lines, 'PAR')
children = parse_lines_as_span(lines, src)
return md_par(children)
end
# Reads one list item, either ordered or unordered.
def read_list_item(src)
parent_offset = src.cur_index
item_type = src.cur_line.md_type
first = src.shift_line
# Ugly things going on inside `read_indented_content`
indentation = spaces_before_first_char(first)
break_list = [:ulist, :olist, :ial]
lines, want_my_paragraph =
read_indented_content(src,indentation, break_list, item_type)
# add first line
# Strip first '*', '-', '+' from first line
stripped = first[indentation, first.size-1]
lines.unshift stripped
#dbg_describe_ary(lines, 'LIST ITEM ')
src2 = LineSource.new(lines, src, parent_offset)
children = parse_blocks(src2)
with_par = want_my_paragraph || (children.size>1)
return md_li(children, with_par)
end
def read_abbreviation(src)
if not (l=src.shift_line) =~ Abbreviation
maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
end
abbr = $1
desc = $2
if (not abbr) or (abbr.size==0)
maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
end
self.abbreviations[abbr] = desc
return md_abbr_def(abbr, desc)
end
def read_footnote_text(src)
parent_offset = src.cur_index
first = src.shift_line
if not first =~ FootnoteText
maruku_error "Bug (it's Andrea's fault)"
end
id = $1
text = $2
# Ugly things going on inside `read_indented_content`
indentation = 4 #first.size-text.size
# puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
break_list = [:footnote_text]
item_type = :footnote_text
lines, want_my_paragraph =
read_indented_content(src,indentation, break_list, item_type)
# add first line
if text && text.strip != "" then lines.unshift text end
# dbg_describe_ary(lines, 'FOOTNOTE')
src2 = LineSource.new(lines, src, parent_offset)
children = parse_blocks(src2)
e = md_footnote(id, children)
self.footnotes[id] = e
return e
end
# This is the only ugly function in the code base.
# It is used to read list items, descriptions, footnote text
def read_indented_content(src, indentation, break_list, item_type)
lines =[]
# collect all indented lines
saw_empty = false; saw_anything_after = false
while src.cur_line
#puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"
if src.cur_line.md_type == :empty
saw_empty = true
lines << src.shift_line
next
end
# after a white line
if saw_empty
# we expect things to be properly aligned
if (ns=number_of_leading_spaces(src.cur_line)) < indentation
#puts "breaking for spaces, only #{ns}: #{src.cur_line}"
break
end
saw_anything_after = true
else
break if break_list.include? src.cur_line.md_type
# break if src.cur_line.md_type != :text
end
stripped = strip_indent(src.shift_line, indentation)
lines << stripped
#puts "Accepted as #{stripped.inspect}"
# You are only required to indent the first line of
# a child paragraph.
if stripped.md_type == :text
while src.cur_line && (src.cur_line.md_type == :text)
lines << strip_indent(src.shift_line, indentation)
end
end
end
want_my_paragraph = saw_anything_after ||
(saw_empty && (src.cur_line && (src.cur_line.md_type == item_type)))
# dbg_describe_ary(lines, 'LI')
# create a new context
while lines.last && (lines.last.md_type == :empty)
lines.pop
end
return lines, want_my_paragraph
end
def read_quote(src)
parent_offset = src.cur_index
lines = []
# collect all indented lines
while src.cur_line && src.cur_line.md_type == :quote
lines << unquote(src.shift_line)
end
# dbg_describe_ary(lines, 'QUOTE')
src2 = LineSource.new(lines, src, parent_offset)
children = parse_blocks(src2)
return md_quote(children)
end
def read_code(src)
# collect all indented lines
lines = []
while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)
lines << strip_indent(src.shift_line, 4)
end
#while lines.last && (lines.last.md_type == :empty )
while lines.last && lines.last.strip.size == 0
lines.pop
end
while lines.first && lines.first.strip.size == 0
lines.shift
end
return nil if lines.empty?
source = lines.join("\n")
# dbg_describe_ary(lines, 'CODE')
return md_codeblock(source)
end
# Reads a series of metadata lines with empty lines in between
def read_metadata(src)
hash = {}
while src.cur_line
case src.cur_line.md_type
when :empty; src.shift_line
when :metadata; hash.merge! parse_metadata(src.shift_line)
else break
end
end
hash
end
def read_ref_definition(src)
line = src.shift_line
# if link is incomplete, shift next line
if src.cur_line && (src.cur_line.md_type != :ref_definition) &&
([1,2,3].include? number_of_leading_spaces(src.cur_line) )
line += " "+ src.shift_line
end
# puts "total= #{line}"
match = LinkRegex.match(line)
if not match
error "Link does not respect format: '#{line}'"
end
id = match[1]; url = match[2]; title = match[3];
id = id.strip.downcase
hash = self.refs[id] = {:url=>url,:title=>title}
stuff=match[4]
if stuff
stuff.split.each do |couple|
# puts "found #{couple}"
k, v = couple.split('=')
v ||= ""
if v[0,1]=='"' then v = v[1, v.size-2] end
# puts "key:_#{k}_ value=_#{v}_"
hash[k.to_sym] = v
end
end
# puts hash.inspect
return md_ref_def(id, url, meta={:title=>title})
end
def read_table(src)
def split_cells(s)
s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
end
head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) }
separator=split_cells(src.shift_line)
align = separator.map { |s| s =~ Sep
if $1 and $2 then :center elsif $2 then :right else :left end }
num_columns = align.size
if head.size != num_columns
maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
tell_user "I will ignore this table."
# XXX try to recover
return md_br()
end
rows = []
while src.cur_line && src.cur_line =~ /\|/
row = split_cells(src.shift_line).map{|s|
md_el(:cell, parse_lines_as_span([s]))}
if head.size != num_columns
maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
tell_user "I will ignore this table."
# XXX try to recover
return md_br()
end
rows << row
end
children = (head+rows).flatten
return md_el(:table, children, {:align => align})
end
# If current line is text, a definition list is coming
# if 1) text,empty,[text,empty]*,definition
def eventually_comes_a_def_list(src)
future = src.tell_me_the_future
ok = future =~ %r{^t+e?d}x
# puts "future: #{future} - #{ok}"
ok
end
def read_definition(src)
# Read one or more terms
terms = []
while src.cur_line && src.cur_line.md_type == :text
terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))
end
# dbg_describe_ary(terms, 'DT')
want_my_paragraph = false
raise "Chunky Bacon!" if not src.cur_line
# one optional empty
if src.cur_line.md_type == :empty
want_my_paragraph = true
src.shift_line
end
raise "Chunky Bacon!" if src.cur_line.md_type != :definition
# Read one or more definitions
definitions = []
while src.cur_line && src.cur_line.md_type == :definition
parent_offset = src.cur_index
first = src.shift_line
first =~ Definition
first = $1
# I know, it's ugly!!!
lines, w_m_p =
read_indented_content(src,4, [:definition], :definition)
want_my_paragraph ||= w_m_p
lines.unshift first
# dbg_describe_ary(lines, 'DD')
src2 = LineSource.new(lines, src, parent_offset)
children = parse_blocks(src2)
definitions << md_el(:definition_data, children)
end
return md_el(:definition, terms+definitions, {
:terms => terms,
:definitions => definitions,
:want_my_paragraph => want_my_paragraph})
end
end # BlockLevelParser
end # MaRuKu
end
end

View file

@ -0,0 +1,225 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'iconv'
module MaRuKu; module In; module Markdown; module BlockLevelParser
def parse_doc(s)
meta2 = parse_email_headers(s)
data = meta2[:data]
meta2.delete :data
self.attributes.merge! meta2
=begin maruku_doc
Attribute: encoding
Scope: document
Summary: Encoding for the document.
If the `encoding` attribute is specified, then the content
will be converted from the specified encoding to UTF-8.
Conversion happens using the `iconv` library.
=end
enc = self.attributes[:encoding]
self.attributes.delete :encoding
if enc && enc.downcase != 'utf-8'
converted = Iconv.new('utf-8', enc).iconv(data)
# puts "Data: #{data.inspect}: #{data}"
# puts "Conv: #{converted.inspect}: #{converted}"
data = converted
end
@children = parse_text_as_markdown(data)
if true #markdown_extra?
self.search_abbreviations
self.substitute_markdown_inside_raw_html
end
toc = create_toc
# use title if not set
if not self.attributes[:title] and toc.header_element
title = toc.header_element.to_s
self.attributes[:title] = title
# puts "Set document title to #{title}"
end
# save for later use
self.toc = toc
# Now do the attributes magic
each_element do |e|
# default attribute list
if default = self.ald[e.node_type.to_s]
expand_attribute_list(default, e.attributes)
end
expand_attribute_list(e.al, e.attributes)
# puts "#{e.node_type}: #{e.attributes.inspect}"
end
=begin maruku_doc
Attribute: unsafe_features
Scope: global
Summary: Enables execution of XML instructions.
Disabled by default because of security concerns.
=end
if Maruku::Globals[:unsafe_features]
self.execute_code_blocks
# TODO: remove executed code blocks
end
end
# Expands an attribute list in an Hash
def expand_attribute_list(al, result)
al.each do |k, v|
case k
when :class
if not result[:class]
result[:class] = v
else
result[:class] += " " + v
end
when :id; result[:id] = v
when :ref;
if self.ald[v]
already = (result[:expanded_references] ||= [])
if not already.include?(v)
already.push v
expand_attribute_list(self.ald[v], result)
else
already.push v
maruku_error "Circular reference between labels.\n\n"+
"Label #{v.inspect} calls itself via recursion.\nThe recursion is "+
(already.map{|x| x.inspect}.join(' => '))
end
else
if not result[:unresolved_references]
result[:unresolved_references] = v
else
result[:unresolved_references] << " #{v}"
end
result[v.to_sym] = true
end
else
result[k.to_sym]=v
end
end
end
def safe_execute_code(object, code)
begin
return object.instance_eval(code)
rescue Exception => e
maruku_error "Exception while executing this:\n"+
add_tabs(code, 1, ">")+
"\nThe error was:\n"+
add_tabs(e.inspect+"\n"+e.caller.join("\n"), 1, "|")
rescue RuntimeError => e
maruku_error "2: Exception while executing this:\n"+
add_tabs(code, 1, ">")+
"\nThe error was:\n"+
add_tabs(e.inspect, 1, "|")
rescue SyntaxError => e
maruku_error "2: Exception while executing this:\n"+
add_tabs(code, 1, ">")+
"\nThe error was:\n"+
add_tabs(e.inspect, 1, "|")
end
nil
end
def execute_code_blocks
self.each_element(:xml_instr) do |e|
if e.target == 'maruku'
result = safe_execute_code(e, e.code)
if result.kind_of?(String)
puts "Result is : #{result.inspect}"
end
end
end
end
def search_abbreviations
self.abbreviations.each do |abbrev, title|
reg = Regexp.new(Regexp.escape(abbrev))
self.replace_each_string do |s|
if m = reg.match(s)
e = md_abbr(abbrev.dup, title ? title.dup : nil)
[m.pre_match, e, m.post_match]
else
s
end
end
end
end
include REXML
# (PHP Markdown extra) Search for elements that have
# markdown=1 or markdown=block defined
def substitute_markdown_inside_raw_html
self.each_element(:raw_html) do |e|
doc = e.instance_variable_get :@parsed_html
if doc # valid html
# parse block-level markdown elements in these HTML tags
block_tags = ['div']
# use xpath to find elements with 'markdown' attribute
XPath.match(doc, "//*[attribute::markdown]" ).each do |e|
# puts "Found #{e}"
# should we parse block-level or span-level?
parse_blocks = (e.attributes['markdown'] == 'block') ||
block_tags.include?(e.name)
# remove 'markdown' attribute
e.delete_attribute 'markdown'
# Select all text elements of e
XPath.match(e, "//text()" ).each { |original_text|
s = original_text.value.strip
if s.size > 0
el = md_el(:dummy,
parse_blocks ? parse_text_as_markdown(s) :
parse_lines_as_span([s]) )
p = original_text.parent
el.children_to_html.each do |x|
p.insert_before(original_text, x)
end
p.delete(original_text)
end
}
end
end
end
end
end end end end

View file

@ -0,0 +1,692 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'set'
module MaRuKu; module In; module Markdown; module SpanLevelParser
include MaRuKu::Helpers
EscapedCharInText =
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
EscapedCharInQuotes =
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
EscapedCharInInlineCode = [?\\,?`]
def parse_lines_as_span(lines, parent=nil)
parse_span_better lines.join("\n"), parent
end
def parse_span_better(string, parent=nil)
if not string.kind_of? String then
error "Passed #{string.class}." end
st = (string + "")
st.freeze
src = CharSource.new(st, parent)
read_span(src, EscapedCharInText, [nil])
end
# This is the main loop for reading span elements
#
# It's long, but not *complex* or difficult to understand.
#
#
def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
con = SpanContext.new
c = d = nil
while true
c = src.cur_char
# This is only an optimization which cuts 50% of the time used.
# (but you can't use a-zA-z in exit_on_chars)
if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
con.cur_string << src.shift_char
next
end
break if exit_on_chars && exit_on_chars.include?(c)
break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
# check if there are extensions
if check_span_extensions(src, con)
next
end
case c = src.cur_char
when ?\ # it's space (32)
if src.cur_chars_are " \n"
src.ignore_chars(3)
con.push_element md_br()
next
else
src.ignore_char
con.push_space
end
when ?\n, ?\t
src.ignore_char
con.push_space
when ?`
read_inline_code(src,con)
when ?<
# It could be:
# 1) HTML "<div ..."
# 2) HTML "<!-- ..."
# 3) url "<http:// ", "<ftp:// ..."
# 4) email "<andrea@... ", "<mailto:andrea@..."
# 5) on itself! "a < b "
# 6) Start of <<guillemettes>>
case d = src.next_char
when ?<; # guillemettes
src.ignore_chars(2)
con.push_char ?<
con.push_char ?<
when ?!;
if src.cur_chars_are '<!--'
read_inline_html(src, con)
else
con.push_char src.shift_char
end
when ??
read_xml_instr_span(src, con)
when ?\ , ?\t
con.push_char src.shift_char
else
if src.next_matches(/<mailto:/) or
src.next_matches(/<[\w\.]+\@/)
read_email_el(src, con)
elsif src.next_matches(/<\w+:/)
read_url_el(src, con)
elsif src.next_matches(/<\w/)
#puts "This is HTML: #{src.cur_chars(20)}"
read_inline_html(src, con)
else
#puts "This is NOT HTML: #{src.cur_chars(20)}"
con.push_char src.shift_char
end
end
when ?\\
d = src.next_char
if d == ?'
src.ignore_chars(2)
con.push_element md_entity('apos')
elsif d == ?"
src.ignore_chars(2)
con.push_element md_entity('quot')
elsif escaped.include? d
src.ignore_chars(2)
con.push_char d
else
con.push_char src.shift_char
end
when ?[
if markdown_extra? && src.next_char == ?^
read_footnote_ref(src,con)
else
read_link(src, con)
end
when ?!
if src.next_char == ?[
read_image(src, con)
else
con.push_char src.shift_char
end
when ?&
if m = src.read_regexp(/\&([\w\d]+);/)
con.push_element md_entity(m[1])
else
con.push_char src.shift_char
end
when ?*
if not src.next_char
maruku_error "Opening * as last char.", src, con
maruku_recover "Threating as literal"
con.push_char src.shift_char
else
follows = src.cur_chars(4)
if follows =~ /^\*\*\*[^\s\*]/
con.push_element read_emstrong(src,'***')
elsif follows =~ /^\*\*[^\s\*]/
con.push_element read_strong(src,'**')
elsif follows =~ /^\*[^\s\*]/
con.push_element read_em(src,'*')
else # * is just a normal char
con.push_char src.shift_char
end
end
when ?_
if not src.next_char
maruku_error "Opening _ as last char", src, con
maruku_recover "Threating as literal", src, con
con.push_char src.shift_char
else
follows = src.cur_chars(4)
if follows =~ /^\_\_\_[^\s\_]/
con.push_element read_emstrong(src,'___')
elsif follows =~ /^\_\_[^\s\_]/
con.push_element read_strong(src,'__')
elsif follows =~ /^\_[^\s\_]/
con.push_element read_em(src,'_')
else # _ is just a normal char
con.push_char src.shift_char
end
end
when ?{ # extension
src.ignore_char # {
interpret_extension(src, con, [?}])
src.ignore_char # }
when nil
maruku_error ("Unclosed span (waiting for %s"+
"#{exit_on_strings.inspect})") % [
exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
src,con
break
else # normal text
con.push_char src.shift_char
end # end case
end # end while true
con.push_string_if_present
# Assign IAL to elements
merge_ial(con.elements, src, con)
# Remove leading space
if (s = con.elements.first).kind_of? String
if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
con.elements.shift if s.size == 0
end
# Remove final spaces
if (s = con.elements.last).kind_of? String
s.chop! if s[-1] == ?\
con.elements.pop if s.size == 0
end
educated = educate(con.elements)
educated
end
def read_xml_instr_span(src, con)
src.ignore_chars(2) # starting <?
# read target <?target code... ?>
target = if m = src.read_regexp(/(\w+)/)
m[1]
else
''
end
delim = "?>"
code =
read_simple(src, escaped=[], break_on_chars=[],
break_on_strings=[delim])
src.ignore_chars delim.size
code = (code || "").strip
con.push_element md_xml_instr(target, code)
end
# Start: cursor on character **after** '{'
# End: curson on '}' or EOF
def interpret_extension(src, con, break_on_chars)
case src.cur_char
when ?:
src.ignore_char # :
extension_meta(src, con, break_on_chars)
when ?#, ?.
extension_meta(src, con, break_on_chars)
else
stuff = read_simple(src, escaped=[?}], break_on_chars, [])
if stuff =~ /^(\w+\s|[^\w])/
extension_id = $1.strip
if false
else
maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
"I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
extension_meta(src, con, break_on_chars)
end
else
maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
extension_meta(src, con, break_on_chars)
end
end
end
def extension_meta(src, con, break_on_chars)
if m = src.read_regexp(/(\w)+\:/)
name = m[1]
content = m[2]
al = read_attribute_list(src, con, break_on_chars)
self.doc.ald[name] = al
con.push md_ald(name, al)
else
al = read_attribute_list(src, con, break_on_chars)
self.doc.ald[name] = al
con.push md_ial(al)
end
end
def read_url_el(src,con)
src.ignore_char # leading <
url = read_simple(src, [], [?>])
src.ignore_char # closing >
con.push_element md_url(url)
end
def read_email_el(src,con)
src.ignore_char # leading <
mail = read_simple(src, [], [?>])
src.ignore_char # closing >
address = mail.gsub(/^mailto:/,'')
con.push_element md_email(address)
end
def read_url(src, break_on)
if [?',?"].include? src.cur_char
error 'Invalid char for url', src
end
url = read_simple(src, [], break_on)
if not url # empty url
url = ""
end
if url[0] == ?< && url[-1] == ?>
url = url[1, url.size-2]
end
if url.size == 0
return nil
end
url
end
def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
case src.cur_char
when ?', ?"
read_quoted(src, con)
else
read_simple(src, escaped, exit_on_chars)
end
end
# Tries to read a quoted value. If stream does not
# start with ' or ", returns nil.
def read_quoted(src, con)
case src.cur_char
when ?', ?"
quote_char = src.shift_char # opening quote
string = read_simple(src, EscapedCharInQuotes, [quote_char])
src.ignore_char # closing quote
return string
else
# puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
return nil
end
end
# Reads a simple string (no formatting) until one of break_on_chars,
# while escaping the escaped.
# If the string is empty, it returns nil.
# Raises on error if the string terminates unexpectedly.
# # If eat_delim is true, and if the delim is not the EOF, then the delim
# # gets eaten from the stream.
def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
text = ""
while true
# puts "Reading simple #{text.inspect}"
c = src.cur_char
if exit_on_chars && exit_on_chars.include?(c)
# src.ignore_char if eat_delim
break
end
break if exit_on_strings &&
exit_on_strings.any? {|x| src.cur_chars_are x}
case c
when nil
s= "String finished while reading (break on "+
"#{exit_on_chars.map{|x|""<<x}.inspect})"+
" already read: #{text.inspect}"
maruku_error s, src
maruku_recover "I boldly continue", src
break
when ?\\
d = src.next_char
if escaped.include? d
src.ignore_chars(2)
text << d
else
text << src.shift_char
end
else
text << src.shift_char
end
end
# puts "Read simple #{text.inspect}"
text.empty? ? nil : text
end
def read_em(src, delim)
src.ignore_char
children = read_span(src, EscapedCharInText, nil, [delim])
src.ignore_char
md_em(children)
end
def read_strong(src, delim)
src.ignore_chars(2)
children = read_span(src, EscapedCharInText, nil, [delim])
src.ignore_chars(2)
md_strong(children)
end
def read_emstrong(src, delim)
src.ignore_chars(3)
children = read_span(src, EscapedCharInText, nil, [delim])
src.ignore_chars(3)
md_emstrong(children)
end
SPACE = ?\ # = 32
# R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
# Reads a bracketed id "[refid]". Consumes also both brackets.
def read_ref_id(src, con)
src.ignore_char # [
src.consume_whitespace
# puts "Next: #{src.cur_chars(10).inspect}"
if m = src.read_regexp(R_REF_ID)
# puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
# puts "Then: #{src.cur_chars(10).inspect}"
m[1]
else
nil
end
end
def read_footnote_ref(src,con)
ref = read_ref_id(src,con)
con.push_element md_foot_ref(ref)
end
def read_inline_html(src, con)
h = HTMLHelper.new
begin
# This is our current buffer in the context
start = src.current_remaining_buffer
h.eat_this start
if not h.is_finished?
error "inline_html: Malformed:\n "+
"#{start.inspect}\n #{h.inspect}",src,con
end
consumed = start.size - h.rest.size
if consumed > 0
con.push_element md_html(h.stuff_you_read)
src.ignore_chars(consumed)
else
puts "HTML helper did not work on #{start.inspect}"
con.push_char src.shift_char
end
rescue Exception => e
maruku_error "Bad html: \n" +
add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
src,con
maruku_recover "I will try to continue after bad HTML.", src, con
con.push_char src.shift_char
end
end
def read_inline_code(src, con)
# Count the number of ticks
num_ticks = 0
while src.cur_char == ?`
num_ticks += 1
src.ignore_char
end
# We will read until this string
end_string = "`"*num_ticks
code =
read_simple(src, escaped=[], break_on_chars=[],
break_on_strings=[end_string])
# puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
src.ignore_chars num_ticks
# Ignore at most one space
if num_ticks > 1 && code[0] == SPACE
code = code[1, code.size-1]
end
# drop last space
if num_ticks > 1 && code[-1] == SPACE
code = code[0,code.size-1]
end
# puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
con.push_element md_code(code)
end
def read_link(src, con)
# we read the string and see what happens
src.ignore_char # opening bracket
children = read_span(src, EscapedCharInText, [?]])
src.ignore_char # closing bracket
# ignore space
if src.cur_char == SPACE and
(src.next_char == ?[ or src.next_char == ?( )
src.shift_char
end
case src.cur_char
when ?(
src.ignore_char # opening (
src.consume_whitespace
url = read_url(src, [SPACE,?\t,?)])
if not url
url = '' # no url is ok
end
src.consume_whitespace
title = nil
if src.cur_char != ?) # we have a title
quote_char = src.cur_char
title = read_quoted(src,con)
if not title
maruku_error 'Must quote title',src,con
else
# Tries to read a title with quotes: ![a](url "ti"tle")
# this is the most ugly thing in Markdown
if not src.next_matches(/\s*\)/)
# if there is not a closing par ), then read
# the rest and guess it's title with quotes
rest = read_simple(src, escaped=[], break_on_chars=[?)],
break_on_strings=[])
# chop the closing char
rest.chop!
title << quote_char << rest
end
end
end
src.consume_whitespace
closing = src.shift_char # closing )
if closing != ?)
maruku_error 'Unclosed link',src,con
maruku_recover "No closing ): I will not create"+
" the link for #{children.inspect}", src, con
con.push_elements children
return
end
con.push_element md_im_link(children,url, title)
when ?[ # link ref
ref_id = read_ref_id(src,con)
if ref_id
con.push_element md_link(children, ref_id)
else
maruku_error "Could not read ref_id", src, con
maruku_recover "I will not create the link for "+
"#{children.inspect}", src, con
con.push_elements children
return
end
else # empty [link]
con.push_element md_link(children, "")
end
end # read link
def read_image(src, con)
src.ignore_chars(2) # opening "!["
alt_text = read_span(src, EscapedCharInText, [?]])
src.ignore_char # closing bracket
# ignore space
if src.cur_char == SPACE and
(src.next_char == ?[ or src.next_char == ?( )
src.ignore_char
end
case src.cur_char
when ?(
src.ignore_char # opening (
src.consume_whitespace
url = read_url(src, [SPACE,?\t,?)])
if not url
error "Could not read url from #{src.cur_chars(10).inspect}",
src,con
end
src.consume_whitespace
title = nil
if src.cur_char != ?) # we have a title
quote_char = src.cur_char
title = read_quoted(src,con)
if not title
maruku_error 'Must quote title',src,con
else
# Tries to read a title with quotes: ![a](url "ti"tle")
# this is the most ugly thing in Markdown
if not src.next_matches(/\s*\)/)
# if there is not a closing par ), then read
# the rest and guess it's title with quotes
rest = read_simple(src, escaped=[], break_on_chars=[?)],
break_on_strings=[])
# chop the closing char
rest.chop!
title << quote_char << rest
end
end
end
src.consume_whitespace
closing = src.shift_char # closing )
if closing != ?)
error ("Unclosed link: '"<<closing<<"'")+
" Read url=#{url.inspect} title=#{title.inspect}",src,con
end
con.push_element md_im_image(alt_text, url, title)
when ?[ # link ref
ref_id = read_ref_id(src,con)
con.push_element md_image(alt_text, ref_id)
else # no stuff
con.push_elements children
end
end # read link
class SpanContext
include MaRuKu::Strings
# Read elements
attr_accessor :elements
attr_accessor :cur_string
def initialize
@elements = []
@cur_string = ""
end
def push_element(e)
raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
not (e.kind_of?(String) or e.kind_of?(MDElement))
push_string_if_present
@elements << e
nil
end
alias push push_element
def push_elements(a)
for e in a
if e.kind_of? String
e.each_byte do |b| push_char b end
else
push_element e
end
end
end
def push_string_if_present
if @cur_string.size > 0
@elements << @cur_string
@cur_string = ""
end
nil
end
def push_char(c)
@cur_string << c
nil
end
# push space into current string if
# there isn't one
def push_space
last = @cur_string[@cur_string.size-1]
@cur_string << ?\ if last != ?\
end
def describe
lines = @elements.map{|x| x.inspect}.join("\n")
s = "Elements read in span: \n" +
add_tabs(lines,1, ' -')+"\n"
if @cur_string.size > 0
s += "Current string: \n #{@cur_string.inspect}\n"
end
s
end
end # SpanContext
end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser

View file

@ -0,0 +1,225 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
#
# NOTA BENE:
#
# The following algorithm is a rip-off of RubyPants written by
# Christian Neukirchen.
#
# RubyPants is a Ruby port of SmartyPants written by John Gruber.
#
# This file is distributed under the GPL, which I guess is compatible
# with the terms of the RubyPants license.
#
# -- Andrea Censi
# = RubyPants -- SmartyPants ported to Ruby
#
# Ported by Christian Neukirchen <mailto:chneukirchen@gmail.com>
# Copyright (C) 2004 Christian Neukirchen
#
# Incooporates ideas, comments and documentation by Chad Miller
# Copyright (C) 2004 Chad Miller
#
# Original SmartyPants by John Gruber
# Copyright (C) 2003 John Gruber
#
#
# = RubyPants -- SmartyPants ported to Ruby
#
#
# [snip]
#
# == Authors
#
# John Gruber did all of the hard work of writing this software in
# Perl for Movable Type and almost all of this useful documentation.
# Chad Miller ported it to Python to use with Pyblosxom.
#
# Christian Neukirchen provided the Ruby port, as a general-purpose
# library that follows the *Cloth API.
#
#
# == Copyright and License
#
# === SmartyPants license:
#
# Copyright (c) 2003 John Gruber
# (http://daringfireball.net)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# * Neither the name "SmartyPants" nor the names of its contributors
# may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# This software is provided by the copyright holders and contributors
# "as is" and any express or implied warranties, including, but not
# limited to, the implied warranties of merchantability and fitness
# for a particular purpose are disclaimed. In no event shall the
# copyright owner or contributors be liable for any direct, indirect,
# incidental, special, exemplary, or consequential damages (including,
# but not limited to, procurement of substitute goods or services;
# loss of use, data, or profits; or business interruption) however
# caused and on any theory of liability, whether in contract, strict
# liability, or tort (including negligence or otherwise) arising in
# any way out of the use of this software, even if advised of the
# possibility of such damage.
#
# === RubyPants license
#
# RubyPants is a derivative work of SmartyPants and smartypants.py.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# This software is provided by the copyright holders and contributors
# "as is" and any express or implied warranties, including, but not
# limited to, the implied warranties of merchantability and fitness
# for a particular purpose are disclaimed. In no event shall the
# copyright owner or contributors be liable for any direct, indirect,
# incidental, special, exemplary, or consequential damages (including,
# but not limited to, procurement of substitute goods or services;
# loss of use, data, or profits; or business interruption) however
# caused and on any theory of liability, whether in contract, strict
# liability, or tort (including negligence or otherwise) arising in
# any way out of the use of this software, even if advised of the
# possibility of such damage.
#
#
# == Links
#
# John Gruber:: http://daringfireball.net
# SmartyPants:: http://daringfireball.net/projects/smartypants
#
# Chad Miller:: http://web.chad.org
#
# Christian Neukirchen:: http://kronavita.de/chris
module MaRuKu; module In; module Markdown; module SpanLevelParser
Punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
Close_class = %![^\ \t\r\n\\[\{\(\-]!
Rules = [
[/---/, :mdash ],
[/--/, :ndash ],
['...', :hellip ],
['. . .', :hellip ],
["``", :ldquo ],
["''", :rdquo ],
[/<<\s/, [:laquo, :nbsp] ],
[/\s>>/, [:nbsp, :raquo] ],
[/<</, :laquo ],
[/>>/, :raquo ],
# def educate_single_backticks(str)
# ["`", :lsquo]
# ["'", :rsquo]
# Special case if the very first character is a quote followed by
# punctuation at a non-word-break. Close the quotes by brute
# force:
[/^'(?=#{Punct_class}\B)/, :rsquo],
[/^"(?=#{Punct_class}\B)/, :rdquo],
# Special case for double sets of quotes, e.g.:
# <p>He said, "'Quoted' words in a larger quote."</p>
[/"'(?=\w)/, [:ldquo, :lsquo] ],
[/'"(?=\w)/, [:lsquo, :ldquo] ],
# Special case for decade abbreviations (the '80s):
[/'(?=\d\ds)/, :rsquo ],
# Get most opening single quotes:
[/(\s)'(?=\w)/, [:one, :lsquo] ],
# Single closing quotes:
[/(#{Close_class})'/, [:one, :rsquo]],
[/'(\s|s\b|$)/, [:rsquo, :one]],
# Any remaining single quotes should be opening ones:
[/'/, :lsquo],
# Get most opening double quotes:
[/(\s)"(?=\w)/, [:one, :ldquo]],
# Double closing quotes:
[/(#{Close_class})"/, [:one, :rdquo]],
[/"(\s|s\b|$)/, [:rdquo, :one]],
# Any remaining quotes should be opening ones:
[/"/, :ldquo]
].
map{|reg, subst| # People should do the thinking, machines should do the work.
reg = Regexp.new(Regexp.escape(reg)) if not reg.kind_of? Regexp
subst = [subst] if not subst.kind_of?Array
[reg, subst]}
# note: input will be destroyed
def apply_one_rule(reg, subst, input)
output = []
while first = input.shift
if first.kind_of?(String) && (m = reg.match(first))
output.push m. pre_match if m. pre_match.size > 0
input.unshift m.post_match if m.post_match.size > 0
subst.reverse.each do |x|
input.unshift( x == :one ? m[1] : md_entity(x.to_s) ) end
else
output.push first
end
end
return output
end
def educate(elements)
Rules.each do |reg, subst|
elements = apply_one_rule(reg, subst, elements)
end
# strips empty strings
elements.delete_if {|x| x.kind_of?(String) && x.size == 0}
final = []
# join consecutive strings
elements.each do |x|
if x.kind_of?(String) && final.last.kind_of?(String)
final.last << x
else
final << x
end
end
return final
end
end end end end

View file

@ -0,0 +1,141 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class String
include MaRuKu::Strings
def md_type()
@md_type ||= line_md_type(self)
end
end
class NilClass
def md_type() nil end
end
# This code does the classification of lines for block-level parsing.
module MaRuKu; module Strings
def line_md_type(l)
# The order of evaluation is important (:text is a catch-all)
return :text if l =~ /^[a-zA-Z]/
return :code if number_of_leading_spaces(l)>=4
return :empty if l =~ /^\s*$/
return :footnote_text if l =~ FootnoteText
return :ref_definition if l =~ LinkRegex or l=~ IncompleteLink
return :abbreviation if l =~ Abbreviation
return :definition if l =~ Definition
# I had a bug with emails and urls at the beginning of the
# line that were mistaken for raw_html
return :text if l=~EMailAddress or l=~ URL
# raw html is like PHP Markdown Extra: at most three spaces before
return :xml_instr if l =~ %r{^\s*<\?}
return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
return :raw_html if l =~ %r{[ ]{0,3}<\!\-\-}
return :ulist if l =~ /^\s?([\*\-\+])\s+.*\w+/
return :olist if l =~ /^\s?\d+\..*\w+/
return :header1 if l =~ /^(=)+/
return :header2 if l =~ /^([-\s])+$/
return :header3 if l =~ /^(#)+\s*\S+/
# at least three asterisks on a line, and only whitespace
return :hrule if l =~ /^(\s*\*\s*){3,1000}$/
return :hrule if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
return :hrule if l =~ /^(\s*_\s*){3,1000}$/ # or underscores
return :quote if l =~ /^>/
return :metadata if l =~ /^@/
# if @@new_meta_data?
return :ald if l =~ AttributeDefinitionList
return :ial if l =~ InlineAttributeList
# end
# return :equation_end if l =~ EquationEnd
return :text # else, it's just text
end
# $1 = id $2 = attribute list
AttributeDefinitionList = /^\s{0,3}\{([\w\d\s]+)\}:\s*(.*)\s*$/
#
InlineAttributeList = /^\s{0,3}\{(.*)\}\s*$/
# Example:
# ^:blah blah
# ^: blah blah
# ^ : blah blah
Definition = %r{
^ # begin of line
[ ]{0,3} # up to 3 spaces
: # colon
\s* # whitespace
(\S.*) # the text = $1
$ # end of line
}x
# Example:
# *[HTML]: Hyper Text Markup Language
Abbreviation = %r{
^ # begin of line
\* # one asterisk
\[ # opening bracket
([^\]]+) # any non-closing bracket: id = $1
\] # closing bracket
: # colon
\s* # whitespace
(\S.*\S)* # definition=$2
\s* # strip this whitespace
$ # end of line
}x
FootnoteText = %r{
^\s*\[(\^.+)\]: # id = $1 (including '^')
\s*(\S.*)?$ # text = $2 (not obb.)
}x
# This regex is taken from BlueCloth sources
# Link defs are in the form: ^[id]: \n? url "optional title"
LinkRegex = %r{
^[ ]*\[([^\]]+)\]: # id = $1
[ ]*
<?(\S+)>? # url = $2
[ ]*
(?:# Titles are delimited by "quotes" or (parens).
["(']
(.+?) # title = $3
[")'] # Matching ) or "
\s*(.+)? # stuff = $4
)? # title is optional
}x
IncompleteLink = %r{^\s*\[(.+)\]:\s*$}
HeaderWithId = /^(.*)\{\#([\w_-]+)\}\s*$/
HeaderWithAttributes = /^(.*)\{(.*)\}\s*$/
# if contains a pipe, it could be a table header
MightBeTableHeader = %r{\|}
# -------------:
Sep = /\s*(\:)?\s*-+\s*(\:)?\s*/
# | -------------:| ------------------------------ |
TableSeparator = %r{^(\|?#{Sep}\|?)+\s*$}
EMailAddress = /<([^:]+@[^:]+)>/
URL = /^<http:/
end end