#-- # Copyright (C) 2006 Andrea Censi # # This file is part of Maruku. # # Maruku is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # Maruku is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Maruku; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #++ module MaRuKu; module In; module Markdown; module BlockLevelParser include Helpers include MaRuKu::Strings include MaRuKu::In::Markdown::SpanLevelParser class BlockContext < Array def describe n = 5 desc = size > n ? self[-n,n] : self "Last #{n} elements: "+ desc.map{|x| "\n -" + x.inspect}.join end end # Splits the string and calls parse_lines_as_markdown def parse_text_as_markdown(text) lines = split_lines(text) src = LineSource.new(lines) return parse_blocks(src) end # Input is a LineSource def parse_blocks(src) output = BlockContext.new # run state machine while src.cur_line next if check_block_extensions(src, output, src.cur_line) # Prints detected type (useful for debugging) # puts "#{src.cur_line.md_type}|#{src.cur_line}" case src.cur_line.md_type when :empty; output.push :empty src.ignore_line when :ial m = InlineAttributeList.match src.shift_line content = m[1] || "" # puts "Content: #{content.inspect}" src2 = CharSource.new(content, src) interpret_extension(src2, output, [nil]) when :ald output.push read_ald(src) when :text if src.cur_line =~ MightBeTableHeader and (src.next_line && src.next_line =~ TableSeparator) output.push read_table(src) elsif [:header1,:header2].include? src.next_line.md_type output.push read_header12(src) elsif eventually_comes_a_def_list(src) definition = read_definition(src) if output.last.kind_of?(MDElement) && output.last.node_type == :definition_list then output.last.children << definition else output.push md_el(:definition_list, [definition]) end else # Start of a paragraph output.push read_paragraph(src) end when :header2, :hrule # hrule src.shift_line output.push md_hrule() when :header3 output.push read_header3(src) when :ulist, :olist list_type = src.cur_line.md_type == :ulist ? :ul : :ol li = read_list_item(src) # append to current list if we have one if output.last.kind_of?(MDElement) && output.last.node_type == list_type then output.last.children << li else output.push md_el(list_type, [li]) end when :quote; output.push read_quote(src) when :code; e = read_code(src); output << e if e when :raw_html; e = read_raw_html(src); output << e if e when :footnote_text; output.push read_footnote_text(src) when :ref_definition; output.push read_ref_definition(src) when :abbreviation; output.push read_abbreviation(src) when :xml_instr; read_xml_instruction(src, output) when :metadata; maruku_error "Please use the new meta-data syntax: \n"+ " http://maruku.rubyforge.org/proposal.html\n", src src.ignore_line else # warn if we forgot something md_type = src.cur_line.md_type line = src.cur_line maruku_error "Ignoring line '#{line}' type = #{md_type}", src src.shift_line end end merge_ial(output, src, output) output.delete_if {|x| x.kind_of?(MDElement) && x.node_type == :ial} # get rid of empty line markers output.delete_if {|x| x == :empty} # See for each list if we can omit the paragraphs and use li_span # TODO: do this after output.each do |c| # Remove paragraphs that we can get rid of if [:ul,:ol].include? c.node_type if c.children.all? {|li| !li.want_my_paragraph} then c.children.each do |d| d.node_type = :li_span d.children = d.children[0].children end end end if c.node_type == :definition_list if c.children.all?{|defi| !defi.want_my_paragraph} then c.children.each do |definition| definition.definitions.each do |dd| dd.children = dd.children[0].children end end end end end output end def read_ald(src) if (l=src.shift_line) =~ AttributeDefinitionList id = $1; al=$2; al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil]) self.ald[id] = al; return md_ald(id, al) else maruku_error "Bug Bug:\n#{l.inspect}" return nil end end # reads a header (with ----- or ========) def read_header12(src) line = src.shift_line.strip al = nil # Check if there is an IAL if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/ line = $1.strip ial = $2 al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil]) end text = parse_lines_as_span [ line ] level = src.cur_line.md_type == :header2 ? 2 : 1; src.shift_line return md_header(level, text, al) end # reads a header like '#### header ####' def read_header3(src) line = src.shift_line.strip al = nil # Check if there is an IAL if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/ line = $1.strip ial = $2 al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil]) end level = num_leading_hashes(line) text = parse_lines_as_span [strip_hashes(line)] return md_header(level, text, al) end def read_xml_instruction(src, output) m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line raise "BugBug" if not m target = m[2] || '' code = m[3] until code =~ /\?>/ code += "\n"+src.shift_line end if not code =~ (/\?>\s*$/) garbage = (/\?>(.*)$/.match(code))[1] maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+ add_tabs(code, 1, '|'), src end code.gsub!(/\?>\s*$/, '') if target == 'mrk' && MaRuKu::Globals[:unsafe_features] result = safe_execute_code(self, code) if result if result.kind_of? String raise "Not expected" else output.push *result end end else output.push md_xml_instr(target, code) end end def read_raw_html(src) h = HTMLHelper.new begin h.eat_this(l=src.shift_line) # puts "\nBLOCK:\nhtml -> #{l.inspect}" while src.cur_line and not h.is_finished? l=src.shift_line # puts "html -> #{l.inspect}" h.eat_this "\n"+l end rescue Exception => e ex = e.inspect + e.backtrace.join("\n") maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'|')}\n", src end raw_html = h.stuff_you_read return md_html(raw_html) end def read_paragraph(src) lines = [] while src.cur_line # :olist does not break case t = src.cur_line.md_type when :quote,:header3,:empty,:raw_html,:ref_definition,:ial,:xml_instr break when :olist,:ulist break if src.next_line.md_type == t end break if src.cur_line.strip.size == 0 break if [:header1,:header2].include? src.next_line.md_type break if any_matching_block_extension?(src.cur_line) lines << src.shift_line end # dbg_describe_ary(lines, 'PAR') children = parse_lines_as_span(lines, src) return md_par(children) end # Reads one list item, either ordered or unordered. def read_list_item(src) parent_offset = src.cur_index item_type = src.cur_line.md_type first = src.shift_line # Ugly things going on inside `read_indented_content` indentation = spaces_before_first_char(first) break_list = [:ulist, :olist, :ial] lines, want_my_paragraph = read_indented_content(src,indentation, break_list, item_type) # add first line # Strip first '*', '-', '+' from first line stripped = first[indentation, first.size-1] lines.unshift stripped #dbg_describe_ary(lines, 'LIST ITEM ') src2 = LineSource.new(lines, src, parent_offset) children = parse_blocks(src2) with_par = want_my_paragraph || (children.size>1) return md_li(children, with_par) end def read_abbreviation(src) if not (l=src.shift_line) =~ Abbreviation maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}" end abbr = $1 desc = $2 if (not abbr) or (abbr.size==0) maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}" end self.abbreviations[abbr] = desc return md_abbr_def(abbr, desc) end def read_footnote_text(src) parent_offset = src.cur_index first = src.shift_line if not first =~ FootnoteText maruku_error "Bug (it's Andrea's fault)" end id = $1 text = $2 # Ugly things going on inside `read_indented_content` indentation = 4 #first.size-text.size # puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}" break_list = [:footnote_text] item_type = :footnote_text lines, want_my_paragraph = read_indented_content(src,indentation, break_list, item_type) # add first line if text && text.strip != "" then lines.unshift text end # dbg_describe_ary(lines, 'FOOTNOTE') src2 = LineSource.new(lines, src, parent_offset) children = parse_blocks(src2) e = md_footnote(id, children) self.footnotes[id] = e return e end # This is the only ugly function in the code base. # It is used to read list items, descriptions, footnote text def read_indented_content(src, indentation, break_list, item_type) lines =[] # collect all indented lines saw_empty = false; saw_anything_after = false while src.cur_line #puts "#{src.cur_line.md_type} #{src.cur_line.inspect}" if src.cur_line.md_type == :empty saw_empty = true lines << src.shift_line next end # after a white line if saw_empty # we expect things to be properly aligned if (ns=number_of_leading_spaces(src.cur_line)) < indentation #puts "breaking for spaces, only #{ns}: #{src.cur_line}" break end saw_anything_after = true else break if break_list.include? src.cur_line.md_type # break if src.cur_line.md_type != :text end stripped = strip_indent(src.shift_line, indentation) lines << stripped #puts "Accepted as #{stripped.inspect}" # You are only required to indent the first line of # a child paragraph. if stripped.md_type == :text while src.cur_line && (src.cur_line.md_type == :text) lines << strip_indent(src.shift_line, indentation) end end end want_my_paragraph = saw_anything_after || (saw_empty && (src.cur_line && (src.cur_line.md_type == item_type))) # dbg_describe_ary(lines, 'LI') # create a new context while lines.last && (lines.last.md_type == :empty) lines.pop end return lines, want_my_paragraph end def read_quote(src) parent_offset = src.cur_index lines = [] # collect all indented lines while src.cur_line && src.cur_line.md_type == :quote lines << unquote(src.shift_line) end # dbg_describe_ary(lines, 'QUOTE') src2 = LineSource.new(lines, src, parent_offset) children = parse_blocks(src2) return md_quote(children) end def read_code(src) # collect all indented lines lines = [] while src.cur_line && ([:code, :empty].include? src.cur_line.md_type) lines << strip_indent(src.shift_line, 4) end #while lines.last && (lines.last.md_type == :empty ) while lines.last && lines.last.strip.size == 0 lines.pop end while lines.first && lines.first.strip.size == 0 lines.shift end return nil if lines.empty? source = lines.join("\n") # dbg_describe_ary(lines, 'CODE') return md_codeblock(source) end # Reads a series of metadata lines with empty lines in between def read_metadata(src) hash = {} while src.cur_line case src.cur_line.md_type when :empty; src.shift_line when :metadata; hash.merge! parse_metadata(src.shift_line) else break end end hash end def read_ref_definition(src) line = src.shift_line # if link is incomplete, shift next line if src.cur_line && (src.cur_line.md_type != :ref_definition) && ([1,2,3].include? number_of_leading_spaces(src.cur_line) ) line += " "+ src.shift_line end # puts "total= #{line}" match = LinkRegex.match(line) if not match error "Link does not respect format: '#{line}'" end id = match[1]; url = match[2]; title = match[3]; id = id.strip.downcase.gsub(' ','_') hash = self.refs[id] = {:url=>url,:title=>title} stuff=match[4] if stuff stuff.split.each do |couple| # puts "found #{couple}" k, v = couple.split('=') v ||= "" if v[0,1]=='"' then v = v[1, v.size-2] end # puts "key:_#{k}_ value=_#{v}_" hash[k.to_sym] = v end end # puts hash.inspect return md_ref_def(id, url, meta={:title=>title}) end def read_table(src) def split_cells(s) s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip} end head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) } separator=split_cells(src.shift_line) align = separator.map { |s| s =~ Sep if $1 and $2 then :center elsif $2 then :right else :left end } num_columns = align.size if head.size != num_columns maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}" tell_user "I will ignore this table." # XXX try to recover return md_br() end rows = [] while src.cur_line && src.cur_line =~ /\|/ row = split_cells(src.shift_line).map{|s| md_el(:cell, parse_lines_as_span([s]))} if head.size != num_columns maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}" tell_user "I will ignore this table." # XXX try to recover return md_br() end rows << row end children = (head+rows).flatten return md_el(:table, children, {:align => align}) end # If current line is text, a definition list is coming # if 1) text,empty,[text,empty]*,definition def eventually_comes_a_def_list(src) future = src.tell_me_the_future ok = future =~ %r{^t+e?d}x # puts "future: #{future} - #{ok}" ok end def read_definition(src) # Read one or more terms terms = [] while src.cur_line && src.cur_line.md_type == :text terms << md_el(:definition_term, parse_lines_as_span([src.shift_line])) end # dbg_describe_ary(terms, 'DT') want_my_paragraph = false raise "Chunky Bacon!" if not src.cur_line # one optional empty if src.cur_line.md_type == :empty want_my_paragraph = true src.shift_line end raise "Chunky Bacon!" if src.cur_line.md_type != :definition # Read one or more definitions definitions = [] while src.cur_line && src.cur_line.md_type == :definition parent_offset = src.cur_index first = src.shift_line first =~ Definition first = $1 # I know, it's ugly!!! lines, w_m_p = read_indented_content(src,4, [:definition], :definition) want_my_paragraph ||= w_m_p lines.unshift first # dbg_describe_ary(lines, 'DD') src2 = LineSource.new(lines, src, parent_offset) children = parse_blocks(src2) definitions << md_el(:definition_data, children) end return md_el(:definition, terms+definitions, { :terms => terms, :definitions => definitions, :want_my_paragraph => want_my_paragraph}) end end # BlockLevelParser end # MaRuKu end end