instiki/lib/maruku/input/parse_block.rb

#--
#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
# 
#   Maruku is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
# 
#   Maruku is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
# 
#   You should have received a copy of the GNU General Public License
#   along with Maruku; if not, write to the Free Software
#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
#++


module MaRuKu; module In; module Markdown; module BlockLevelParser

	include Helpers
	include MaRuKu::Strings
	include MaRuKu::In::Markdown::SpanLevelParser

	class BlockContext < Array
		def describe
			n = 5
			desc = size > n ? self[-n,n] : self
			"Last #{n} elements: "+
			desc.map{|x| "\n -" + x.inspect}.join
		end
	end
	
	# Splits the string and calls parse_lines_as_markdown
	def parse_text_as_markdown(text)
		lines =  split_lines(text)
		src = LineSource.new(lines)
		return parse_blocks(src)
	end
	
	# Input is a LineSource
	def parse_blocks(src)
		output = BlockContext.new
		
		# run state machine
		while src.cur_line
			
			next if check_block_extensions(src, output, src.cur_line)
			
#  Prints detected type (useful for debugging)
#			puts "#{src.cur_line.md_type}|#{src.cur_line}"
			case src.cur_line.md_type
				when :empty; 
					output.push :empty
					src.ignore_line
				when :ial
					m =  InlineAttributeList.match src.shift_line
					content = m[1] ||  "" 
					src2 = CharSource.new(content, src)
					interpret_extension(src2, output, [nil])
				when :ald
					output.push read_ald(src)
				when :text
					if src.cur_line =~ MightBeTableHeader and 
						(src.next_line && src.next_line =~ TableSeparator)
						output.push read_table(src)
					elsif [:header1,:header2].include? src.next_line.md_type
						output.push read_header12(src)
					elsif eventually_comes_a_def_list(src)
					 	definition = read_definition(src)
						if output.last.kind_of?(MDElement) && 
							output.last.node_type == :definition_list then
							output.last.children << definition
						else
							output.push md_el(:definition_list, [definition])
						end
					else # Start of a paragraph
						output.push read_paragraph(src)
					end
				when :header2, :hrule
					# hrule
					src.shift_line
					output.push md_hrule()
				when :header3
					output.push read_header3(src)
				when :ulist, :olist
					list_type = src.cur_line.md_type == :ulist ? :ul : :ol
					li = read_list_item(src)
					# append to current list if we have one
					if output.last.kind_of?(MDElement) && 
						output.last.node_type == list_type then
						output.last.children << li
					else
						output.push md_el(list_type, [li])
					end
				when :quote;    output.push read_quote(src)
				when :code;     e = read_code(src); output << e if e
				when :raw_html; e = read_raw_html(src); output << e if e

				when :footnote_text;   output.push read_footnote_text(src)
				when :ref_definition;  output.push read_ref_definition(src)
				when :abbreviation;    output.push read_abbreviation(src)
				when :xml_instr;       read_xml_instruction(src, output)
				when :metadata;        
					maruku_error "Please use the new meta-data syntax: \n"+
					"  http://maruku.rubyforge.org/proposal.html\n", src
					src.ignore_line
				else # warn if we forgot something
					md_type = src.cur_line.md_type
					line = src.cur_line
					maruku_error "Ignoring line '#{line}' type = #{md_type}", src
					src.shift_line
			end
		end

		merge_ial(output, src, output)
		output.delete_if {|x| x.kind_of?(MDElement) &&
			x.node_type == :ial}
		
		# get rid of empty line markers
		output.delete_if {|x| x == :empty}
		# See for each list if we can omit the paragraphs and use li_span
		# TODO: do this after
		output.each do |c| 
			# Remove paragraphs that we can get rid of
			if [:ul,:ol].include? c.node_type 
				if c.children.all? {|li| !li.want_my_paragraph} then
					c.children.each do |d|
						d.node_type = :li_span
						d.children = d.children[0].children 
					end
				end
			end 
			if c.node_type == :definition_list
				if c.children.all?{|defi| !defi.want_my_paragraph} then
					c.children.each do |definition| 
						definition.definitions.each do |dd|
							dd.children = dd.children[0].children 
						end
					end
				end
			end 
		end
		
		output
	end
	
	
	def read_ald(src)
		if (l=src.shift_line) =~ AttributeDefinitionList
			id = $1;   al=$2;
			al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])
			self.ald[id] = al;
			return md_ald(id, al)
		else
			maruku_error "Bug Bug:\n#{l.inspect}"
			return nil
		end
	end
		
	# reads a header (with ----- or ========)
	def read_header12(src)
		line = src.shift_line.strip
		al = nil
		# Check if there is an IAL
		if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
			line = $1.strip
			ial = $2
			al  = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
		end
		text = parse_lines_as_span [ line ]
		level = src.cur_line.md_type == :header2 ? 2 : 1;  
		src.shift_line
		return md_header(level, text, al)
	end

	# reads a header like '#### header ####'	
	def read_header3(src)
		line = src.shift_line.strip
		al = nil
		# Check if there is an IAL
		if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
			line = $1.strip
			ial = $2
			al  = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
		end
		level = num_leading_hashes(line)
		text = parse_lines_as_span [strip_hashes(line)] 
		return md_header(level, text, al)
	end

	def read_xml_instruction(src, output)
		m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
		raise "BugBug" if not m
		target = m[2] || ''
		code = m[3]
		until code =~ /\?>/
			code += "\n"+src.shift_line
		end
		if not code =~ (/\?>\s*$/)
			garbage = (/\?>(.*)$/.match(code))[1]
			maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+
				add_tabs(code, 1, '|'), src
		end
		code.gsub!(/\?>\s*$/, '')
		
		if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
			result = safe_execute_code(self, code)	
			if result
				if result.kind_of? String
					raise "Not expected"
				else
					output.push *result
				end
			end
		else
			output.push md_xml_instr(target, code)
		end
	end
	
	def read_raw_html(src)
		h = HTMLHelper.new
		begin 
			h.eat_this(l=src.shift_line)
#			puts "\nBLOCK:\nhtml -> #{l.inspect}"
			while src.cur_line and not h.is_finished? 
				l=src.shift_line
#				puts "html -> #{l.inspect}"
				h.eat_this "\n"+l
			end
		rescue Exception => e
			ex = e.inspect + e.backtrace.join("\n")
			maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'|')}\n", src
		end
		raw_html = h.stuff_you_read
		return md_html(raw_html)
	end
	
	def read_paragraph(src)
		lines = []
		while src.cur_line 
			# :olist does not break
			case t = src.cur_line.md_type
				when :quote,:header3,:empty,:raw_html,:ref_definition,:ial,:xml_instr
					break
				when :olist,:ulist
					break if src.next_line.md_type == t
			end
			break if src.cur_line.strip.size == 0			
			break if [:header1,:header2].include? src.next_line.md_type
			break if any_matching_block_extension?(src.cur_line)
			
			lines << src.shift_line
		end
#		dbg_describe_ary(lines, 'PAR')
		children = parse_lines_as_span(lines, src)

		return md_par(children)
	end
	
	# Reads one list item, either ordered or unordered.
	def read_list_item(src)
		parent_offset = src.cur_index
		
		item_type = src.cur_line.md_type
		first = src.shift_line

		# Ugly things going on inside `read_indented_content`
		indentation = spaces_before_first_char(first)
		break_list = [:ulist, :olist, :ial]
		lines, want_my_paragraph = 
			read_indented_content(src,indentation, break_list, item_type)

		# add first line
			# Strip first '*', '-', '+' from first line
			stripped = first[indentation, first.size-1]
		lines.unshift stripped
		
		#dbg_describe_ary(lines, 'LIST ITEM ')

		src2 = LineSource.new(lines, src, parent_offset)
		children = parse_blocks(src2)
		with_par = want_my_paragraph || (children.size>1)
		
		return md_li(children, with_par)
	end

	def read_abbreviation(src)
		if not (l=src.shift_line) =~ Abbreviation
			maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
		end
		
		abbr = $1
		desc = $2
		
		if (not abbr) or (abbr.size==0)
			maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
		end
		
		self.abbreviations[abbr] = desc
		
		return md_abbr_def(abbr, desc)
	end
	
	def read_footnote_text(src)
		parent_offset = src.cur_index
			
		first = src.shift_line
		
		if not first =~ FootnoteText 
			maruku_error "Bug (it's Andrea's fault)"
		end
		
		id = $1
		text = $2

		# Ugly things going on inside `read_indented_content`
		indentation = 4 #first.size-text.size
		
#		puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
		
		break_list = [:footnote_text]
		item_type = :footnote_text
		lines, want_my_paragraph = 
			read_indented_content(src,indentation, break_list, item_type)

		# add first line
		if text && text.strip != "" then lines.unshift text end
		
#		dbg_describe_ary(lines, 'FOOTNOTE')
		src2 = LineSource.new(lines, src, parent_offset)
		children = parse_blocks(src2)
		
		e = md_footnote(id, children)
		self.footnotes[id] = e
		return e
	end


	# This is the only ugly function in the code base.
	# It is used to read list items, descriptions, footnote text
	def read_indented_content(src, indentation, break_list, item_type)
		lines =[]
		# collect all indented lines
		saw_empty = false; saw_anything_after = false
		while src.cur_line 
			#puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"
			if src.cur_line.md_type == :empty
				saw_empty = true
				lines << src.shift_line
				next
			end
		
			# after a white line
			if saw_empty
				# we expect things to be properly aligned
				if (ns=number_of_leading_spaces(src.cur_line)) < indentation
					#puts "breaking for spaces, only #{ns}: #{src.cur_line}"
					break
				end
				saw_anything_after = true
			else
				break if break_list.include? src.cur_line.md_type
#				break if src.cur_line.md_type != :text
			end
		

			stripped = strip_indent(src.shift_line, indentation)
			lines << stripped

			#puts "Accepted as #{stripped.inspect}"
		
			# You are only required to indent the first line of 
			# a child paragraph.
			if stripped.md_type == :text
				while src.cur_line && (src.cur_line.md_type == :text)
					lines << strip_indent(src.shift_line, indentation)
				end
			end
		end

		want_my_paragraph = saw_anything_after || 
			(saw_empty && (src.cur_line  && (src.cur_line.md_type == item_type))) 
	
#		dbg_describe_ary(lines, 'LI')
		# create a new context 
	
		while lines.last && (lines.last.md_type == :empty)
			lines.pop
		end
		
		return lines, want_my_paragraph
	end

	
	def read_quote(src)
		parent_offset = src.cur_index
			
		lines = []
		# collect all indented lines
		while src.cur_line && src.cur_line.md_type == :quote
			lines << unquote(src.shift_line)
		end
#		dbg_describe_ary(lines, 'QUOTE')

		src2 = LineSource.new(lines, src, parent_offset)
		children = parse_blocks(src2)
		return md_quote(children)
	end

	def read_code(src)
		# collect all indented lines
		lines = []
		while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)
			lines << strip_indent(src.shift_line, 4)
		end
		
		#while lines.last && (lines.last.md_type == :empty )
		while lines.last && lines.last.strip.size == 0
			lines.pop 
		end

		while lines.first && lines.first.strip.size == 0
			lines.shift 
		end
		
		return nil if lines.empty?

		source = lines.join("\n")
		
#		dbg_describe_ary(lines, 'CODE')

		return md_codeblock(source)
	end

	# Reads a series of metadata lines with empty lines in between
	def read_metadata(src)
		hash = {}
		while src.cur_line 
			case src.cur_line.md_type
				when :empty;  src.shift_line
				when :metadata; hash.merge! parse_metadata(src.shift_line)
				else break
			end
		end
		hash
	end
	
		
	def read_ref_definition(src)
		line = src.shift_line
		
		# if link is incomplete, shift next line
		if src.cur_line && (src.cur_line.md_type != :ref_definition) && 
			([1,2,3].include? number_of_leading_spaces(src.cur_line) )
			line += " "+ src.shift_line
		end
		
#		puts "total= #{line}"
		
		match = LinkRegex.match(line)
		if not match
			error "Link does not respect format: '#{line}'"
		end
		
		id = match[1]; url = match[2]; title = match[3]; 
		id = id.strip.downcase
		
		hash = self.refs[id] = {:url=>url,:title=>title}
		
		stuff=match[4]
		
		if stuff
			stuff.split.each do |couple|
#					puts "found #{couple}"
				k, v = couple.split('=')
				v ||= ""
				if v[0,1]=='"' then v = v[1, v.size-2] end
#					puts "key:_#{k}_ value=_#{v}_"
				hash[k.to_sym] = v
			end
		end
#			puts hash.inspect
		
		return md_ref_def(id, url, meta={:title=>title})
	end
	
	def read_table(src)
		
		def split_cells(s)
			s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
		end
		
		head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) }
			
		separator=split_cells(src.shift_line)

		align = separator.map { |s|  s =~ Sep
			if $1 and $2 then :center elsif $2 then :right else :left end }
				
		num_columns = align.size
		
		if head.size != num_columns
			maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
			tell_user "I will ignore this table."
			# XXX try to recover
			return md_br()
		end
				
		rows = []
		
		while src.cur_line && src.cur_line =~ /\|/
			row = split_cells(src.shift_line).map{|s|
				md_el(:cell, parse_lines_as_span([s]))}
			if head.size != num_columns
				maruku_error  "Row does not have #{num_columns} columns: \n#{row.inspect}"
				tell_user "I will ignore this table."
				# XXX try to recover
				return md_br()
			end
			rows << row
		end

		children = (head+rows).flatten
		return md_el(:table, children, {:align => align})
	end
	
	# If current line is text, a definition list is coming
	# if 1) text,empty,[text,empty]*,definition
	
	def eventually_comes_a_def_list(src)
		future = src.tell_me_the_future
		ok = future =~ %r{^t+e?d}x
#		puts "future: #{future} - #{ok}"
		ok
	end
	
		
	def read_definition(src)
		# Read one or more terms
		terms = []
		while  src.cur_line &&  src.cur_line.md_type == :text
			terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))
		end
#		dbg_describe_ary(terms, 'DT')

		want_my_paragraph = false

		raise "Chunky Bacon!" if not src.cur_line

		# one optional empty
		if src.cur_line.md_type == :empty
			want_my_paragraph = true
			src.shift_line
		end
		
		raise "Chunky Bacon!" if src.cur_line.md_type != :definition
		
		# Read one or more definitions
		definitions = []
		while src.cur_line && src.cur_line.md_type == :definition
			parent_offset = src.cur_index
				
			first = src.shift_line
			first =~ Definition
			first = $1
			
			# I know, it's ugly!!!

			lines, w_m_p = 
				read_indented_content(src,4, [:definition], :definition)
			want_my_paragraph ||= w_m_p
		
			lines.unshift first
			
#			dbg_describe_ary(lines, 'DD')
			src2 = LineSource.new(lines, src, parent_offset)
			children = parse_blocks(src2)
			definitions << md_el(:definition_data, children)
		end
		
		return md_el(:definition, terms+definitions, { 	
			:terms => terms, 
			:definitions => definitions, 
			:want_my_paragraph => want_my_paragraph})
	end
end # BlockLevelParser
end # MaRuKu
end
end
Bring up to current. 2007-01-22 15:36:51 +01:00			`#--`
			`# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>`
			`#`
			`# This file is part of Maruku.`
			`#`
			`# Maruku is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation; either version 2 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# Maruku is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with Maruku; if not, write to the Free Software`
			`# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA`
			`#++`


			`module MaRuKu; module In; module Markdown; module BlockLevelParser`

			`include Helpers`
			`include MaRuKu::Strings`
			`include MaRuKu::In::Markdown::SpanLevelParser`

			`class BlockContext < Array`
			`def describe`
			`n = 5`
			`desc = size > n ? self[-n,n] : self`
			`"Last #{n} elements: "+`
			`desc.map{\|x\| "\n -" + x.inspect}.join`
			`end`
			`end`

			`# Splits the string and calls parse_lines_as_markdown`
			`def parse_text_as_markdown(text)`
			`lines = split_lines(text)`
			`src = LineSource.new(lines)`
			`return parse_blocks(src)`
			`end`

			`# Input is a LineSource`
			`def parse_blocks(src)`
			`output = BlockContext.new`

			`# run state machine`
			`while src.cur_line`

			`next if check_block_extensions(src, output, src.cur_line)`

			`# Prints detected type (useful for debugging)`
			`# puts "#{src.cur_line.md_type}\|#{src.cur_line}"`
			`case src.cur_line.md_type`
			`when :empty;`
			`output.push :empty`
			`src.ignore_line`
			`when :ial`
			`m = InlineAttributeList.match src.shift_line`
			`content = m[1] \|\| ""`
			`src2 = CharSource.new(content, src)`
			`interpret_extension(src2, output, [nil])`
			`when :ald`
			`output.push read_ald(src)`
			`when :text`
			`if src.cur_line =~ MightBeTableHeader and`
			`(src.next_line && src.next_line =~ TableSeparator)`
			`output.push read_table(src)`
			`elsif [:header1,:header2].include? src.next_line.md_type`
			`output.push read_header12(src)`
			`elsif eventually_comes_a_def_list(src)`
			`definition = read_definition(src)`
			`if output.last.kind_of?(MDElement) &&`
			`output.last.node_type == :definition_list then`
			`output.last.children << definition`
			`else`
			`output.push md_el(:definition_list, [definition])`
			`end`
			`else # Start of a paragraph`
			`output.push read_paragraph(src)`
			`end`
			`when :header2, :hrule`
			`# hrule`
			`src.shift_line`
			`output.push md_hrule()`
			`when :header3`
			`output.push read_header3(src)`
			`when :ulist, :olist`
			`list_type = src.cur_line.md_type == :ulist ? :ul : :ol`
			`li = read_list_item(src)`
			`# append to current list if we have one`
			`if output.last.kind_of?(MDElement) &&`
			`output.last.node_type == list_type then`
			`output.last.children << li`
			`else`
			`output.push md_el(list_type, [li])`
			`end`
			`when :quote; output.push read_quote(src)`
			`when :code; e = read_code(src); output << e if e`
			`when :raw_html; e = read_raw_html(src); output << e if e`

			`when :footnote_text; output.push read_footnote_text(src)`
			`when :ref_definition; output.push read_ref_definition(src)`
			`when :abbreviation; output.push read_abbreviation(src)`
			`when :xml_instr; read_xml_instruction(src, output)`
			`when :metadata;`
			`maruku_error "Please use the new meta-data syntax: \n"+`
			`" http://maruku.rubyforge.org/proposal.html\n", src`
			`src.ignore_line`
			`else # warn if we forgot something`
			`md_type = src.cur_line.md_type`
			`line = src.cur_line`
			`maruku_error "Ignoring line '#{line}' type = #{md_type}", src`
			`src.shift_line`
			`end`
			`end`

			`merge_ial(output, src, output)`
			`output.delete_if {\|x\| x.kind_of?(MDElement) &&`
			`x.node_type == :ial}`

			`# get rid of empty line markers`
			`output.delete_if {\|x\| x == :empty}`
			`# See for each list if we can omit the paragraphs and use li_span`
			`# TODO: do this after`
			`output.each do \|c\|`
			`# Remove paragraphs that we can get rid of`
			`if [:ul,:ol].include? c.node_type`
			`if c.children.all? {\|li\| !li.want_my_paragraph} then`
			`c.children.each do \|d\|`
			`d.node_type = :li_span`
			`d.children = d.children[0].children`
			`end`
			`end`
			`end`
			`if c.node_type == :definition_list`
			`if c.children.all?{\|defi\| !defi.want_my_paragraph} then`
			`c.children.each do \|definition\|`
			`definition.definitions.each do \|dd\|`
			`dd.children = dd.children[0].children`
			`end`
			`end`
			`end`
			`end`
			`end`

			`output`
			`end`



			`def read_ald(src)`
			`if (l=src.shift_line) =~ AttributeDefinitionList`
			`id = $1; al=$2;`
			`al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])`
			`self.ald[id] = al;`
			`return md_ald(id, al)`
			`else`
			`maruku_error "Bug Bug:\n#{l.inspect}"`
			`return nil`
			`end`
			`end`

			`# reads a header (with ----- or ========)`
			`def read_header12(src)`
			`line = src.shift_line.strip`
			`al = nil`
			`# Check if there is an IAL`
			`if new_meta_data? and line =~ /^(.)\{(.)\}\s*$/`
			`line = $1.strip`
			`ial = $2`
			`al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])`
			`end`
			`text = parse_lines_as_span [ line ]`
			`level = src.cur_line.md_type == :header2 ? 2 : 1;`
			`src.shift_line`
			`return md_header(level, text, al)`
			`end`

			`# reads a header like '#### header ####'`
			`def read_header3(src)`
			`line = src.shift_line.strip`
			`al = nil`
			`# Check if there is an IAL`
			`if new_meta_data? and line =~ /^(.)\{(.)\}\s*$/`
			`line = $1.strip`
			`ial = $2`
			`al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])`
			`end`
			`level = num_leading_hashes(line)`
			`text = parse_lines_as_span [strip_hashes(line)]`
			`return md_header(level, text, al)`
			`end`

			`def read_xml_instruction(src, output)`
			`m = /^\s<\?((\w+)\s)?(.*)$/.match src.shift_line`
			`raise "BugBug" if not m`
			`target = m[2] \|\| ''`
			`code = m[3]`
			`until code =~ /\?>/`
			`code += "\n"+src.shift_line`
			`end`
			`if not code =~ (/\?>\s*$/)`
			`garbage = (/\?>(.*)$/.match(code))[1]`
			`maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+`
			`add_tabs(code, 1, '\|'), src`
			`end`
			`code.gsub!(/\?>\s*$/, '')`

			`if target == 'mrk' && MaRuKu::Globals[:unsafe_features]`
			`result = safe_execute_code(self, code)`
			`if result`
			`if result.kind_of? String`
			`raise "Not expected"`
			`else`
			`output.push *result`
			`end`
			`end`
			`else`
			`output.push md_xml_instr(target, code)`
			`end`
			`end`

			`def read_raw_html(src)`
			`h = HTMLHelper.new`
			`begin`
			`h.eat_this(l=src.shift_line)`
			`# puts "\nBLOCK:\nhtml -> #{l.inspect}"`
			`while src.cur_line and not h.is_finished?`
			`l=src.shift_line`
			`# puts "html -> #{l.inspect}"`
			`h.eat_this "\n"+l`
			`end`
			`rescue Exception => e`
			`ex = e.inspect + e.backtrace.join("\n")`
			`maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'\|')}\n", src`
			`end`
			`raw_html = h.stuff_you_read`
			`return md_html(raw_html)`
			`end`

			`def read_paragraph(src)`
			`lines = []`
			`while src.cur_line`
			`# :olist does not break`
			`case t = src.cur_line.md_type`
			`when :quote,:header3,:empty,:raw_html,:ref_definition,:ial,:xml_instr`
			`break`
			`when :olist,:ulist`
			`break if src.next_line.md_type == t`
			`end`
			`break if src.cur_line.strip.size == 0`
			`break if [:header1,:header2].include? src.next_line.md_type`
			`break if any_matching_block_extension?(src.cur_line)`

			`lines << src.shift_line`
			`end`
			`# dbg_describe_ary(lines, 'PAR')`
			`children = parse_lines_as_span(lines, src)`

			`return md_par(children)`
			`end`

			`# Reads one list item, either ordered or unordered.`
			`def read_list_item(src)`
			`parent_offset = src.cur_index`

			`item_type = src.cur_line.md_type`
			`first = src.shift_line`

			# Ugly things going on inside `read_indented_content`
			`indentation = spaces_before_first_char(first)`
			`break_list = [:ulist, :olist, :ial]`
			`lines, want_my_paragraph =`
			`read_indented_content(src,indentation, break_list, item_type)`

			`# add first line`
			`# Strip first '*', '-', '+' from first line`
			`stripped = first[indentation, first.size-1]`
			`lines.unshift stripped`

			`#dbg_describe_ary(lines, 'LIST ITEM ')`

			`src2 = LineSource.new(lines, src, parent_offset)`
			`children = parse_blocks(src2)`
			`with_par = want_my_paragraph \|\| (children.size>1)`

			`return md_li(children, with_par)`
			`end`

			`def read_abbreviation(src)`
			`if not (l=src.shift_line) =~ Abbreviation`
			`maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"`
			`end`

			`abbr = $1`
			`desc = $2`

			`if (not abbr) or (abbr.size==0)`
			`maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"`
			`end`

			`self.abbreviations[abbr] = desc`

			`return md_abbr_def(abbr, desc)`
			`end`

			`def read_footnote_text(src)`
			`parent_offset = src.cur_index`

			`first = src.shift_line`

			`if not first =~ FootnoteText`
			`maruku_error "Bug (it's Andrea's fault)"`
			`end`

			`id = $1`
			`text = $2`

			# Ugly things going on inside `read_indented_content`
			`indentation = 4 #first.size-text.size`

			`# puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"`

			`break_list = [:footnote_text]`
			`item_type = :footnote_text`
			`lines, want_my_paragraph =`
			`read_indented_content(src,indentation, break_list, item_type)`

			`# add first line`
			`if text && text.strip != "" then lines.unshift text end`

			`# dbg_describe_ary(lines, 'FOOTNOTE')`
			`src2 = LineSource.new(lines, src, parent_offset)`
			`children = parse_blocks(src2)`

			`e = md_footnote(id, children)`
			`self.footnotes[id] = e`
			`return e`
			`end`


			`# This is the only ugly function in the code base.`
			`# It is used to read list items, descriptions, footnote text`
			`def read_indented_content(src, indentation, break_list, item_type)`
			`lines =[]`
			`# collect all indented lines`
			`saw_empty = false; saw_anything_after = false`
			`while src.cur_line`
			`#puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"`
			`if src.cur_line.md_type == :empty`
			`saw_empty = true`
			`lines << src.shift_line`
			`next`
			`end`

			`# after a white line`
			`if saw_empty`
			`# we expect things to be properly aligned`
			`if (ns=number_of_leading_spaces(src.cur_line)) < indentation`
			`#puts "breaking for spaces, only #{ns}: #{src.cur_line}"`
			`break`
			`end`
			`saw_anything_after = true`
			`else`
			`break if break_list.include? src.cur_line.md_type`
			`# break if src.cur_line.md_type != :text`
			`end`


			`stripped = strip_indent(src.shift_line, indentation)`
			`lines << stripped`

			`#puts "Accepted as #{stripped.inspect}"`

			`# You are only required to indent the first line of`
			`# a child paragraph.`
			`if stripped.md_type == :text`
			`while src.cur_line && (src.cur_line.md_type == :text)`
			`lines << strip_indent(src.shift_line, indentation)`
			`end`
			`end`
			`end`

			`want_my_paragraph = saw_anything_after \|\|`
			`(saw_empty && (src.cur_line && (src.cur_line.md_type == item_type)))`

			`# dbg_describe_ary(lines, 'LI')`
			`# create a new context`

			`while lines.last && (lines.last.md_type == :empty)`
			`lines.pop`
			`end`

			`return lines, want_my_paragraph`
			`end`


			`def read_quote(src)`
			`parent_offset = src.cur_index`

			`lines = []`
			`# collect all indented lines`
			`while src.cur_line && src.cur_line.md_type == :quote`
			`lines << unquote(src.shift_line)`
			`end`
			`# dbg_describe_ary(lines, 'QUOTE')`

			`src2 = LineSource.new(lines, src, parent_offset)`
			`children = parse_blocks(src2)`
			`return md_quote(children)`
			`end`

			`def read_code(src)`
			`# collect all indented lines`
			`lines = []`
			`while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)`
			`lines << strip_indent(src.shift_line, 4)`
			`end`

			`#while lines.last && (lines.last.md_type == :empty )`
			`while lines.last && lines.last.strip.size == 0`
			`lines.pop`
			`end`

			`while lines.first && lines.first.strip.size == 0`
			`lines.shift`
			`end`

			`return nil if lines.empty?`

			`source = lines.join("\n")`

			`# dbg_describe_ary(lines, 'CODE')`

			`return md_codeblock(source)`
			`end`

			`# Reads a series of metadata lines with empty lines in between`
			`def read_metadata(src)`
			`hash = {}`
			`while src.cur_line`
			`case src.cur_line.md_type`
			`when :empty; src.shift_line`
			`when :metadata; hash.merge! parse_metadata(src.shift_line)`
			`else break`
			`end`
			`end`
			`hash`
			`end`


			`def read_ref_definition(src)`
			`line = src.shift_line`

			`# if link is incomplete, shift next line`
			`if src.cur_line && (src.cur_line.md_type != :ref_definition) &&`
			`([1,2,3].include? number_of_leading_spaces(src.cur_line) )`
			`line += " "+ src.shift_line`
			`end`

			`# puts "total= #{line}"`

			`match = LinkRegex.match(line)`
			`if not match`
			`error "Link does not respect format: '#{line}'"`
			`end`

			`id = match[1]; url = match[2]; title = match[3];`
			`id = id.strip.downcase`

			`hash = self.refs[id] = {:url=>url,:title=>title}`

			`stuff=match[4]`

			`if stuff`
			`stuff.split.each do \|couple\|`
			`# puts "found #{couple}"`
			`k, v = couple.split('=')`
			`v \|\|= ""`
			`if v[0,1]=='"' then v = v[1, v.size-2] end`
			`# puts "key:_#{k}_ value=_#{v}_"`
			`hash[k.to_sym] = v`
			`end`
			`end`
			`# puts hash.inspect`

			`return md_ref_def(id, url, meta={:title=>title})`
			`end`

			`def read_table(src)`

			`def split_cells(s)`
			`s.strip.split('\|').select{\|x\|x.strip.size>0}.map{\|x\|x.strip}`
			`end`

			`head = split_cells(src.shift_line).map{\|s\| md_el(:head_cell, parse_lines_as_span([s])) }`

			`separator=split_cells(src.shift_line)`

			`align = separator.map { \|s\| s =~ Sep`
			`if $1 and $2 then :center elsif $2 then :right else :left end }`

			`num_columns = align.size`

			`if head.size != num_columns`
			`maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"`
			`tell_user "I will ignore this table."`
			`# XXX try to recover`
			`return md_br()`
			`end`

			`rows = []`

			`while src.cur_line && src.cur_line =~ /\\|/`
			`row = split_cells(src.shift_line).map{\|s\|`
			`md_el(:cell, parse_lines_as_span([s]))}`
			`if head.size != num_columns`
			`maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"`
			`tell_user "I will ignore this table."`
			`# XXX try to recover`
			`return md_br()`
			`end`
			`rows << row`
			`end`

			`children = (head+rows).flatten`
			`return md_el(:table, children, {:align => align})`
			`end`

			`# If current line is text, a definition list is coming`
			`# if 1) text,empty,[text,empty]*,definition`

			`def eventually_comes_a_def_list(src)`
			`future = src.tell_me_the_future`
			`ok = future =~ %r{^t+e?d}x`
			`# puts "future: #{future} - #{ok}"`
			`ok`
			`end`


			`def read_definition(src)`
			`# Read one or more terms`
			`terms = []`
			`while src.cur_line && src.cur_line.md_type == :text`
			`terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))`
			`end`
			`# dbg_describe_ary(terms, 'DT')`

			`want_my_paragraph = false`

			`raise "Chunky Bacon!" if not src.cur_line`

			`# one optional empty`
			`if src.cur_line.md_type == :empty`
			`want_my_paragraph = true`
			`src.shift_line`
			`end`

			`raise "Chunky Bacon!" if src.cur_line.md_type != :definition`

			`# Read one or more definitions`
			`definitions = []`
			`while src.cur_line && src.cur_line.md_type == :definition`
			`parent_offset = src.cur_index`

			`first = src.shift_line`
			`first =~ Definition`
			`first = $1`

			`# I know, it's ugly!!!`

			`lines, w_m_p =`
			`read_indented_content(src,4, [:definition], :definition)`
			`want_my_paragraph \|\|= w_m_p`

			`lines.unshift first`

			`# dbg_describe_ary(lines, 'DD')`
			`src2 = LineSource.new(lines, src, parent_offset)`
			`children = parse_blocks(src2)`
			`definitions << md_el(:definition_data, children)`
			`end`

			`return md_el(:definition, terms+definitions, {`
			`:terms => terms,`
			`:definitions => definitions,`
			`:want_my_paragraph => want_my_paragraph})`
			`end`
			`end # BlockLevelParser`
			`end # MaRuKu`
			`end`
			`end`