594 lines
15 KiB
Ruby
594 lines
15 KiB
Ruby
#--
|
|
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
|
#
|
|
# This file is part of Maruku.
|
|
#
|
|
# Maruku is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Maruku is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Maruku; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
#++
|
|
|
|
|
|
module MaRuKu; module In; module Markdown; module BlockLevelParser
|
|
|
|
include Helpers
|
|
include MaRuKu::Strings
|
|
include MaRuKu::In::Markdown::SpanLevelParser
|
|
|
|
class BlockContext < Array
|
|
def describe
|
|
n = 5
|
|
desc = size > n ? self[-n,n] : self
|
|
"Last #{n} elements: "+
|
|
desc.map{|x| "\n -" + x.inspect}.join
|
|
end
|
|
end
|
|
|
|
# Splits the string and calls parse_lines_as_markdown
|
|
def parse_text_as_markdown(text)
|
|
lines = split_lines(text)
|
|
src = LineSource.new(lines)
|
|
return parse_blocks(src)
|
|
end
|
|
|
|
# Input is a LineSource
|
|
def parse_blocks(src)
|
|
output = BlockContext.new
|
|
|
|
# run state machine
|
|
while src.cur_line
|
|
|
|
next if check_block_extensions(src, output, src.cur_line)
|
|
|
|
# Prints detected type (useful for debugging)
|
|
# puts "#{src.cur_line.md_type}|#{src.cur_line}"
|
|
case src.cur_line.md_type
|
|
when :empty;
|
|
output.push :empty
|
|
src.ignore_line
|
|
when :ial
|
|
m = InlineAttributeList.match src.shift_line
|
|
content = m[1] || ""
|
|
src2 = CharSource.new(content, src)
|
|
interpret_extension(src2, output, [nil])
|
|
when :ald
|
|
output.push read_ald(src)
|
|
when :text
|
|
if src.cur_line =~ MightBeTableHeader and
|
|
(src.next_line && src.next_line =~ TableSeparator)
|
|
output.push read_table(src)
|
|
elsif [:header1,:header2].include? src.next_line.md_type
|
|
output.push read_header12(src)
|
|
elsif eventually_comes_a_def_list(src)
|
|
definition = read_definition(src)
|
|
if output.last.kind_of?(MDElement) &&
|
|
output.last.node_type == :definition_list then
|
|
output.last.children << definition
|
|
else
|
|
output.push md_el(:definition_list, [definition])
|
|
end
|
|
else # Start of a paragraph
|
|
output.push read_paragraph(src)
|
|
end
|
|
when :header2, :hrule
|
|
# hrule
|
|
src.shift_line
|
|
output.push md_hrule()
|
|
when :header3
|
|
output.push read_header3(src)
|
|
when :ulist, :olist
|
|
list_type = src.cur_line.md_type == :ulist ? :ul : :ol
|
|
li = read_list_item(src)
|
|
# append to current list if we have one
|
|
if output.last.kind_of?(MDElement) &&
|
|
output.last.node_type == list_type then
|
|
output.last.children << li
|
|
else
|
|
output.push md_el(list_type, [li])
|
|
end
|
|
when :quote; output.push read_quote(src)
|
|
when :code; e = read_code(src); output << e if e
|
|
when :raw_html; e = read_raw_html(src); output << e if e
|
|
|
|
when :footnote_text; output.push read_footnote_text(src)
|
|
when :ref_definition; output.push read_ref_definition(src)
|
|
when :abbreviation; output.push read_abbreviation(src)
|
|
when :xml_instr; read_xml_instruction(src, output)
|
|
when :metadata;
|
|
maruku_error "Please use the new meta-data syntax: \n"+
|
|
" http://maruku.rubyforge.org/proposal.html\n", src
|
|
src.ignore_line
|
|
else # warn if we forgot something
|
|
md_type = src.cur_line.md_type
|
|
line = src.cur_line
|
|
maruku_error "Ignoring line '#{line}' type = #{md_type}", src
|
|
src.shift_line
|
|
end
|
|
end
|
|
|
|
merge_ial(output, src, output)
|
|
output.delete_if {|x| x.kind_of?(MDElement) &&
|
|
x.node_type == :ial}
|
|
|
|
# get rid of empty line markers
|
|
output.delete_if {|x| x == :empty}
|
|
# See for each list if we can omit the paragraphs and use li_span
|
|
# TODO: do this after
|
|
output.each do |c|
|
|
# Remove paragraphs that we can get rid of
|
|
if [:ul,:ol].include? c.node_type
|
|
if c.children.all? {|li| !li.want_my_paragraph} then
|
|
c.children.each do |d|
|
|
d.node_type = :li_span
|
|
d.children = d.children[0].children
|
|
end
|
|
end
|
|
end
|
|
if c.node_type == :definition_list
|
|
if c.children.all?{|defi| !defi.want_my_paragraph} then
|
|
c.children.each do |definition|
|
|
definition.definitions.each do |dd|
|
|
dd.children = dd.children[0].children
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
output
|
|
end
|
|
|
|
|
|
|
|
def read_ald(src)
|
|
if (l=src.shift_line) =~ AttributeDefinitionList
|
|
id = $1; al=$2;
|
|
al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])
|
|
self.ald[id] = al;
|
|
return md_ald(id, al)
|
|
else
|
|
maruku_error "Bug Bug:\n#{l.inspect}"
|
|
return nil
|
|
end
|
|
end
|
|
|
|
# reads a header (with ----- or ========)
|
|
def read_header12(src)
|
|
line = src.shift_line.strip
|
|
al = nil
|
|
# Check if there is an IAL
|
|
if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
|
|
line = $1.strip
|
|
ial = $2
|
|
al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
|
|
end
|
|
text = parse_lines_as_span [ line ]
|
|
level = src.cur_line.md_type == :header2 ? 2 : 1;
|
|
src.shift_line
|
|
return md_header(level, text, al)
|
|
end
|
|
|
|
# reads a header like '#### header ####'
|
|
def read_header3(src)
|
|
line = src.shift_line.strip
|
|
al = nil
|
|
# Check if there is an IAL
|
|
if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
|
|
line = $1.strip
|
|
ial = $2
|
|
al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
|
|
end
|
|
level = num_leading_hashes(line)
|
|
text = parse_lines_as_span [strip_hashes(line)]
|
|
return md_header(level, text, al)
|
|
end
|
|
|
|
def read_xml_instruction(src, output)
|
|
m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
|
|
raise "BugBug" if not m
|
|
target = m[2] || ''
|
|
code = m[3]
|
|
until code =~ /\?>/
|
|
code += "\n"+src.shift_line
|
|
end
|
|
if not code =~ (/\?>\s*$/)
|
|
garbage = (/\?>(.*)$/.match(code))[1]
|
|
maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+
|
|
add_tabs(code, 1, '|'), src
|
|
end
|
|
code.gsub!(/\?>\s*$/, '')
|
|
|
|
if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
|
|
result = safe_execute_code(self, code)
|
|
if result
|
|
if result.kind_of? String
|
|
raise "Not expected"
|
|
else
|
|
output.push *result
|
|
end
|
|
end
|
|
else
|
|
output.push md_xml_instr(target, code)
|
|
end
|
|
end
|
|
|
|
def read_raw_html(src)
|
|
h = HTMLHelper.new
|
|
begin
|
|
h.eat_this(l=src.shift_line)
|
|
# puts "\nBLOCK:\nhtml -> #{l.inspect}"
|
|
while src.cur_line and not h.is_finished?
|
|
l=src.shift_line
|
|
# puts "html -> #{l.inspect}"
|
|
h.eat_this "\n"+l
|
|
end
|
|
rescue Exception => e
|
|
ex = e.inspect + e.backtrace.join("\n")
|
|
maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'|')}\n", src
|
|
end
|
|
raw_html = h.stuff_you_read
|
|
return md_html(raw_html)
|
|
end
|
|
|
|
def read_paragraph(src)
|
|
lines = []
|
|
while src.cur_line
|
|
# :olist does not break
|
|
case t = src.cur_line.md_type
|
|
when :quote,:header3,:empty,:raw_html,:ref_definition,:ial,:xml_instr
|
|
break
|
|
when :olist,:ulist
|
|
break if src.next_line.md_type == t
|
|
end
|
|
break if src.cur_line.strip.size == 0
|
|
break if [:header1,:header2].include? src.next_line.md_type
|
|
break if any_matching_block_extension?(src.cur_line)
|
|
|
|
lines << src.shift_line
|
|
end
|
|
# dbg_describe_ary(lines, 'PAR')
|
|
children = parse_lines_as_span(lines, src)
|
|
|
|
return md_par(children)
|
|
end
|
|
|
|
# Reads one list item, either ordered or unordered.
|
|
def read_list_item(src)
|
|
parent_offset = src.cur_index
|
|
|
|
item_type = src.cur_line.md_type
|
|
first = src.shift_line
|
|
|
|
# Ugly things going on inside `read_indented_content`
|
|
indentation = spaces_before_first_char(first)
|
|
break_list = [:ulist, :olist, :ial]
|
|
lines, want_my_paragraph =
|
|
read_indented_content(src,indentation, break_list, item_type)
|
|
|
|
# add first line
|
|
# Strip first '*', '-', '+' from first line
|
|
stripped = first[indentation, first.size-1]
|
|
lines.unshift stripped
|
|
|
|
#dbg_describe_ary(lines, 'LIST ITEM ')
|
|
|
|
src2 = LineSource.new(lines, src, parent_offset)
|
|
children = parse_blocks(src2)
|
|
with_par = want_my_paragraph || (children.size>1)
|
|
|
|
return md_li(children, with_par)
|
|
end
|
|
|
|
def read_abbreviation(src)
|
|
if not (l=src.shift_line) =~ Abbreviation
|
|
maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
|
|
end
|
|
|
|
abbr = $1
|
|
desc = $2
|
|
|
|
if (not abbr) or (abbr.size==0)
|
|
maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
|
|
end
|
|
|
|
self.abbreviations[abbr] = desc
|
|
|
|
return md_abbr_def(abbr, desc)
|
|
end
|
|
|
|
def read_footnote_text(src)
|
|
parent_offset = src.cur_index
|
|
|
|
first = src.shift_line
|
|
|
|
if not first =~ FootnoteText
|
|
maruku_error "Bug (it's Andrea's fault)"
|
|
end
|
|
|
|
id = $1
|
|
text = $2
|
|
|
|
# Ugly things going on inside `read_indented_content`
|
|
indentation = 4 #first.size-text.size
|
|
|
|
# puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
|
|
|
|
break_list = [:footnote_text]
|
|
item_type = :footnote_text
|
|
lines, want_my_paragraph =
|
|
read_indented_content(src,indentation, break_list, item_type)
|
|
|
|
# add first line
|
|
if text && text.strip != "" then lines.unshift text end
|
|
|
|
# dbg_describe_ary(lines, 'FOOTNOTE')
|
|
src2 = LineSource.new(lines, src, parent_offset)
|
|
children = parse_blocks(src2)
|
|
|
|
e = md_footnote(id, children)
|
|
self.footnotes[id] = e
|
|
return e
|
|
end
|
|
|
|
|
|
# This is the only ugly function in the code base.
|
|
# It is used to read list items, descriptions, footnote text
|
|
def read_indented_content(src, indentation, break_list, item_type)
|
|
lines =[]
|
|
# collect all indented lines
|
|
saw_empty = false; saw_anything_after = false
|
|
while src.cur_line
|
|
#puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"
|
|
if src.cur_line.md_type == :empty
|
|
saw_empty = true
|
|
lines << src.shift_line
|
|
next
|
|
end
|
|
|
|
# after a white line
|
|
if saw_empty
|
|
# we expect things to be properly aligned
|
|
if (ns=number_of_leading_spaces(src.cur_line)) < indentation
|
|
#puts "breaking for spaces, only #{ns}: #{src.cur_line}"
|
|
break
|
|
end
|
|
saw_anything_after = true
|
|
else
|
|
break if break_list.include? src.cur_line.md_type
|
|
# break if src.cur_line.md_type != :text
|
|
end
|
|
|
|
|
|
stripped = strip_indent(src.shift_line, indentation)
|
|
lines << stripped
|
|
|
|
#puts "Accepted as #{stripped.inspect}"
|
|
|
|
# You are only required to indent the first line of
|
|
# a child paragraph.
|
|
if stripped.md_type == :text
|
|
while src.cur_line && (src.cur_line.md_type == :text)
|
|
lines << strip_indent(src.shift_line, indentation)
|
|
end
|
|
end
|
|
end
|
|
|
|
want_my_paragraph = saw_anything_after ||
|
|
(saw_empty && (src.cur_line && (src.cur_line.md_type == item_type)))
|
|
|
|
# dbg_describe_ary(lines, 'LI')
|
|
# create a new context
|
|
|
|
while lines.last && (lines.last.md_type == :empty)
|
|
lines.pop
|
|
end
|
|
|
|
return lines, want_my_paragraph
|
|
end
|
|
|
|
|
|
def read_quote(src)
|
|
parent_offset = src.cur_index
|
|
|
|
lines = []
|
|
# collect all indented lines
|
|
while src.cur_line && src.cur_line.md_type == :quote
|
|
lines << unquote(src.shift_line)
|
|
end
|
|
# dbg_describe_ary(lines, 'QUOTE')
|
|
|
|
src2 = LineSource.new(lines, src, parent_offset)
|
|
children = parse_blocks(src2)
|
|
return md_quote(children)
|
|
end
|
|
|
|
def read_code(src)
|
|
# collect all indented lines
|
|
lines = []
|
|
while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)
|
|
lines << strip_indent(src.shift_line, 4)
|
|
end
|
|
|
|
#while lines.last && (lines.last.md_type == :empty )
|
|
while lines.last && lines.last.strip.size == 0
|
|
lines.pop
|
|
end
|
|
|
|
while lines.first && lines.first.strip.size == 0
|
|
lines.shift
|
|
end
|
|
|
|
return nil if lines.empty?
|
|
|
|
source = lines.join("\n")
|
|
|
|
# dbg_describe_ary(lines, 'CODE')
|
|
|
|
return md_codeblock(source)
|
|
end
|
|
|
|
# Reads a series of metadata lines with empty lines in between
|
|
def read_metadata(src)
|
|
hash = {}
|
|
while src.cur_line
|
|
case src.cur_line.md_type
|
|
when :empty; src.shift_line
|
|
when :metadata; hash.merge! parse_metadata(src.shift_line)
|
|
else break
|
|
end
|
|
end
|
|
hash
|
|
end
|
|
|
|
|
|
def read_ref_definition(src)
|
|
line = src.shift_line
|
|
|
|
# if link is incomplete, shift next line
|
|
if src.cur_line && (src.cur_line.md_type != :ref_definition) &&
|
|
([1,2,3].include? number_of_leading_spaces(src.cur_line) )
|
|
line += " "+ src.shift_line
|
|
end
|
|
|
|
# puts "total= #{line}"
|
|
|
|
match = LinkRegex.match(line)
|
|
if not match
|
|
error "Link does not respect format: '#{line}'"
|
|
end
|
|
|
|
id = match[1]; url = match[2]; title = match[3];
|
|
id = id.strip.downcase
|
|
|
|
hash = self.refs[id] = {:url=>url,:title=>title}
|
|
|
|
stuff=match[4]
|
|
|
|
if stuff
|
|
stuff.split.each do |couple|
|
|
# puts "found #{couple}"
|
|
k, v = couple.split('=')
|
|
v ||= ""
|
|
if v[0,1]=='"' then v = v[1, v.size-2] end
|
|
# puts "key:_#{k}_ value=_#{v}_"
|
|
hash[k.to_sym] = v
|
|
end
|
|
end
|
|
# puts hash.inspect
|
|
|
|
return md_ref_def(id, url, meta={:title=>title})
|
|
end
|
|
|
|
def read_table(src)
|
|
|
|
def split_cells(s)
|
|
s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
|
|
end
|
|
|
|
head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) }
|
|
|
|
separator=split_cells(src.shift_line)
|
|
|
|
align = separator.map { |s| s =~ Sep
|
|
if $1 and $2 then :center elsif $2 then :right else :left end }
|
|
|
|
num_columns = align.size
|
|
|
|
if head.size != num_columns
|
|
maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
|
|
tell_user "I will ignore this table."
|
|
# XXX try to recover
|
|
return md_br()
|
|
end
|
|
|
|
rows = []
|
|
|
|
while src.cur_line && src.cur_line =~ /\|/
|
|
row = split_cells(src.shift_line).map{|s|
|
|
md_el(:cell, parse_lines_as_span([s]))}
|
|
if head.size != num_columns
|
|
maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
|
|
tell_user "I will ignore this table."
|
|
# XXX try to recover
|
|
return md_br()
|
|
end
|
|
rows << row
|
|
end
|
|
|
|
children = (head+rows).flatten
|
|
return md_el(:table, children, {:align => align})
|
|
end
|
|
|
|
# If current line is text, a definition list is coming
|
|
# if 1) text,empty,[text,empty]*,definition
|
|
|
|
def eventually_comes_a_def_list(src)
|
|
future = src.tell_me_the_future
|
|
ok = future =~ %r{^t+e?d}x
|
|
# puts "future: #{future} - #{ok}"
|
|
ok
|
|
end
|
|
|
|
|
|
def read_definition(src)
|
|
# Read one or more terms
|
|
terms = []
|
|
while src.cur_line && src.cur_line.md_type == :text
|
|
terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))
|
|
end
|
|
# dbg_describe_ary(terms, 'DT')
|
|
|
|
want_my_paragraph = false
|
|
|
|
raise "Chunky Bacon!" if not src.cur_line
|
|
|
|
# one optional empty
|
|
if src.cur_line.md_type == :empty
|
|
want_my_paragraph = true
|
|
src.shift_line
|
|
end
|
|
|
|
raise "Chunky Bacon!" if src.cur_line.md_type != :definition
|
|
|
|
# Read one or more definitions
|
|
definitions = []
|
|
while src.cur_line && src.cur_line.md_type == :definition
|
|
parent_offset = src.cur_index
|
|
|
|
first = src.shift_line
|
|
first =~ Definition
|
|
first = $1
|
|
|
|
# I know, it's ugly!!!
|
|
|
|
lines, w_m_p =
|
|
read_indented_content(src,4, [:definition], :definition)
|
|
want_my_paragraph ||= w_m_p
|
|
|
|
lines.unshift first
|
|
|
|
# dbg_describe_ary(lines, 'DD')
|
|
src2 = LineSource.new(lines, src, parent_offset)
|
|
children = parse_blocks(src2)
|
|
definitions << md_el(:definition_data, children)
|
|
end
|
|
|
|
return md_el(:definition, terms+definitions, {
|
|
:terms => terms,
|
|
:definitions => definitions,
|
|
:want_my_paragraph => want_my_paragraph})
|
|
end
|
|
end # BlockLevelParser
|
|
end # MaRuKu
|
|
end
|
|
end |