Bring up to current.

2007-01-22 08:36:51 -06:00 · 2007-01-22 08:36:51 -06:00 · b19e1e4f47
commit b19e1e4f47
parent 69b62b6f33
71 changed files with 8305 additions and 39 deletions
--- a/lib/maruku/input/charsource.rb
+++ b/lib/maruku/input/charsource.rb
@ -0,0 +1,325 @@
+#--
+#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
+#
+# This file is part of Maruku.
+# 
+#   Maruku is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+# 
+#   Maruku is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+# 
+#   You should have received a copy of the GNU General Public License
+#   along with Maruku; if not, write to the Free Software
+#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#++
+
+
+module MaRuKu; module In; module Markdown; module SpanLevelParser
+
+# a string scanner coded by me
+class CharSourceManual; end
+
+# a wrapper around StringScanner
+class CharSourceStrscan; end
+
+# A debug scanner that checks the correctness of both
+# by comparing their output
+class CharSourceDebug; end
+
+# Choose!
+
+CharSource = CharSourceManual     # faster! 58ms vs. 65ms
+#CharSource = CharSourceStrscan
+#CharSource = CharSourceDebug
+
+
+class CharSourceManual
+	include MaRuKu::Strings
+	
+	def initialize(s, parent=nil)
+		raise "Passed #{s.class}" if not s.kind_of? String
+		@buffer = s
+		@buffer_index = 0
+		@parent = parent
+	end
+	
+	# Return current char as a FixNum (or nil).
+	def cur_char; @buffer[@buffer_index]   end
+
+	# Return the next n chars as a String.
+	def cur_chars(n); @buffer[@buffer_index,n]  end
+	
+	# Return the char after current char as a FixNum (or nil).
+	def next_char; @buffer[@buffer_index+1] end
+	
+	def shift_char
+		c = @buffer[@buffer_index]
+		@buffer_index+=1
+		c
+	end
+	
+	def ignore_char
+		@buffer_index+=1
+		nil
+	end
+	
+	def ignore_chars(n)
+		@buffer_index+=n
+		nil
+	end
+	
+	def current_remaining_buffer
+		@buffer[@buffer_index, @buffer.size-@buffer_index]
+	end
+	
+	def cur_chars_are(string)
+		# There is a bug here
+		if false
+			r2 = /^.{#{@buffer_index}}#{Regexp.escape string}/m
+			@buffer =~ r2
+		else
+			cur_chars(string.size) == string
+		end
+	end
+
+	def next_matches(r)
+		r2 = /^.{#{@buffer_index}}#{r}/m
+		md = r2.match @buffer
+		return !!md
+	end
+	
+	def read_regexp3(r)
+		r2 = /^.{#{@buffer_index}}#{r}/m
+		m = r2.match @buffer
+		if m
+			consumed = m.to_s.size - @buffer_index
+#			puts "Consumed #{consumed} chars (entire is #{m.to_s.inspect})"
+			ignore_chars consumed
+		else
+#			puts "Could not read regexp #{r2.inspect} from buffer "+
+#			" index=#{@buffer_index}"
+#			puts "Cur chars = #{cur_chars(20).inspect}"
+#			puts "Matches? = #{cur_chars(20) =~ r}"
+		end
+		m
+	end
+
+		def read_regexp(r)
+			r2 = /^#{r}/
+			rest = current_remaining_buffer
+			m = r2.match(rest)
+			if m
+				@buffer_index += m.to_s.size
+#				puts "#{r} matched #{rest.inspect}: #{m.to_s.inspect}"
+			end
+			return m
+		end
+	
+	def consume_whitespace
+		while c = cur_char 
+			if (c == 32 || c == ?\t)
+#				puts "ignoring #{c}"
+				ignore_char
+			else
+#				puts "#{c} is not ws: "<<c
+				break
+			end
+		end
+	end
+
+	def read_text_chars(out)
+		s = @buffer.size; c=nil
+		while @buffer_index < s && (c=@buffer[@buffer_index]) &&
+			 ((c>=?a && c<=?z) || (c>=?A && c<=?Z))
+				out << c
+				@buffer_index += 1
+		end
+	end
+	
+	def describe
+		s = describe_pos(@buffer, @buffer_index)
+		if @parent
+			s += "\n\n" + @parent.describe
+		end
+		s
+	end
+	include SpanLevelParser
+end
+
+def describe_pos(buffer, buffer_index)
+	len = 75
+	num_before = [len/2, buffer_index].min
+	num_after = [len/2, buffer.size-buffer_index].min
+	num_before_max = buffer_index
+	num_after_max = buffer.size-buffer_index
+	
+#		puts "num #{num_before} #{num_after}"
+	num_before = [num_before_max, len-num_after].min
+	num_after  = [num_after_max, len-num_before].min
+#		puts "num #{num_before} #{num_after}"
+	
+	index_start = [buffer_index - num_before, 0].max
+	index_end   = [buffer_index + num_after, buffer.size].min
+	
+	size = index_end- index_start
+	
+#		puts "- #{index_start} #{size}"
+
+	str = buffer[index_start, size]
+	str.gsub!("\n",'N')
+	str.gsub!("\t",'T')
+	
+	if index_end == buffer.size 
+		str += "EOF"
+	end
+		
+	pre_s = buffer_index-index_start
+	pre_s = [pre_s, 0].max
+	pre_s2 = [len-pre_s,0].max
+#		puts "pre_S = #{pre_s}"
+	pre =" "*(pre_s) 
+	
+	"-"*len+"\n"+
+	str + "\n" +
+	"-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
+#		pre + "|\n"+
+	pre + "+--- Byte #{buffer_index}\n"+
+	
+	"Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
+	add_tabs(buffer,1,">")
+	
+#		"CharSource: At character #{@buffer_index} of block "+
+#		" beginning with:\n    #{@buffer[0,50].inspect} ...\n"+
+#		" before: \n     ... #{cur_chars(50).inspect} ... "
+end
+
+
+require 'strscan'
+
+class CharSourceStrscan
+	include SpanLevelParser
+	include MaRuKu::Strings
+	
+	def initialize(s)
+		@s = StringScanner.new(s)
+	end
+	
+	# Return current char as a FixNum (or nil).
+	def cur_char
+		 @s.peek(1)[0]
+	end
+
+	# Return the next n chars as a String.
+	def cur_chars(n); 
+		@s.peek(n)
+	end
+	
+	# Return the char after current char as a FixNum (or nil).
+	def next_char; 
+		@s.peek(2)[1]
+	end
+	
+	def shift_char
+		(@s.get_byte)[0]
+	end
+	
+	def ignore_char
+		@s.get_byte
+		nil
+	end
+	
+	def ignore_chars(n)
+		n.times do @s.get_byte end
+		nil
+	end
+	
+	def current_remaining_buffer
+		@s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index]
+	end
+	
+	def cur_chars_are(string)
+		cur_chars(string.size) == string
+	end
+
+	def next_matches(r)
+		len = @s.match?(r)
+		return !!len
+	end
+	
+	def read_regexp(r)
+		string = @s.scan(r)
+		if string
+			return r.match(string)
+		else
+			return nil
+		end
+	end
+	
+	def consume_whitespace
+		@s.scan /\s+/
+		nil
+	end
+	
+	def describe
+		describe_pos(@s.string, @s.pos)
+	end
+	
+end
+
+
+class CharSourceDebug
+	def initialize(s)
+		@a = CharSourceManual.new(s)
+		@b = CharSourceStrscan.new(s)
+	end
+	
+	def method_missing(methodname, *args)
+		a_bef = @a.describe
+		b_bef = @b.describe
+		
+		a = @a.send(methodname, *args)
+		b = @b.send(methodname, *args)
+		
+#		if methodname == :describe
+#			return a
+#		end
+		
+		if a.kind_of? MatchData
+			if a.to_a != b.to_a
+				puts "called: #{methodname}(#{args})"
+				puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
+				puts "AFTER: "+@a.describe
+				puts "AFTER: "+@b.describe
+				puts "BEFORE: "+a_bef
+				puts "BEFORE: "+b_bef
+				puts caller.join("\n")
+				exit
+			end
+		else
+			if a!=b
+				puts "called: #{methodname}(#{args})"
+				puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
+				puts ""+@a.describe
+				puts ""+@b.describe
+				puts caller.join("\n")
+				exit
+			end
+		end
+		
+		if @a.cur_char != @b.cur_char
+			puts "Fuori sincronia dopo #{methodname}(#{args})"
+			puts ""+@a.describe
+			puts ""+@b.describe
+			exit
+		end
+		
+		return a
+	end
+end
+
+end end end end
--- a/lib/maruku/input/extensions.rb
+++ b/lib/maruku/input/extensions.rb
@ -0,0 +1,68 @@
+module MaRuKu; module In; module Markdown
+
+
+	# Hash Fixnum -> name
+	SpanExtensionsTrigger = {}
+	
+	
+	class SpanExtension
+		# trigging chars
+		attr_accessor :chars
+		# trigging regexp
+		attr_accessor :regexp
+		# lambda
+		attr_accessor :block
+	end
+	
+	# Hash String -> Extension
+	SpanExtensions = {}
+
+	def check_span_extensions(src, con)
+		c = src.cur_char
+		if extensions = SpanExtensionsTrigger[c]
+			extensions.each do |e|
+				if e.regexp && (match = src.next_matches(e.regexp))
+					return true if e.block.call(doc, src, con)
+				end
+			end
+		end
+		return false # not special
+	end
+	
+	def self.register_span_extension(args, &block)
+		e = SpanExtension.new
+		e.chars = [*args[:chars]]
+		e.regexp = args[:regexp]
+		e.block = block
+		e.chars.each do |c|
+			(SpanExtensionsTrigger[c] ||= []).push e
+		end
+	end
+
+	def self.register_block_extension(args, &block)
+		regexp = args[:regexp]
+		BlockExtensions[regexp] = block
+	end
+
+	# Hash Regexp -> Block
+	BlockExtensions = {}
+
+	def check_block_extensions(src, con, line)
+		BlockExtensions.each do |reg, block|
+			if m = reg.match(line)
+				block = BlockExtensions[reg]
+				return true if block.call(doc, src, con)
+			end
+		end
+		return false # not special
+	end
+	
+	def any_matching_block_extension?(line)
+		BlockExtensions.each_key do |reg|
+			m = reg.match(line)
+			return m if m
+		end
+		return false
+	end
+	
+end end end
--- a/lib/maruku/input/html_helper.rb
+++ b/lib/maruku/input/html_helper.rb
@ -0,0 +1,144 @@
+#--
+#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
+#
+# This file is part of Maruku.
+# 
+#   Maruku is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+# 
+#   Maruku is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+# 
+#   You should have received a copy of the GNU General Public License
+#   along with Maruku; if not, write to the Free Software
+#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#++
+
+
+module MaRuKu; module In; module Markdown; module SpanLevelParser
+
+# This class helps me read and sanitize HTML blocks
+
+# I tried to do this with REXML, but wasn't able to. (suggestions?)
+
+	class HTMLHelper
+		include MaRuKu::Strings
+		
+		Tag = %r{^<(/)?(\w+)\s*([^>]*)>}m
+		EverythingElse = %r{^[^<]+}m
+		CommentStart = %r{^<!--}x
+		CommentEnd = %r{^.*-->}
+		TO_SANITIZE = ['img','hr'] 
+		
+#		attr_accessor :inside_comment
+		attr_reader :rest
+		
+		def initialize 
+			@rest = ""
+			@tag_stack = []
+			@m = nil
+			@already = ""
+			@inside_comment = false
+		end
+		
+		def eat_this(line)
+			@rest = line  + @rest
+			things_read = 0
+			until @rest.empty?
+				if @inside_comment
+					if @m = CommentEnd.match(@rest)
+						@inside_comment = false
+						@already += @m.pre_match + @m.to_s
+						@rest = @m.post_match
+					elsif @m = EverythingElse.match(@rest)
+						@already += @m.pre_match + @m.to_s
+						@rest = @m.post_match
+					end
+				else
+					if @m = CommentStart.match(@rest)
+						things_read += 1
+						@inside_comment = true
+						@already += @m.pre_match + @m.to_s
+						@rest = @m.post_match
+					elsif @m = Tag.match(@rest)
+						things_read += 1
+						@already += @m.pre_match
+						@rest = @m.post_match
+					
+						is_closing = !!@m[1]
+						tag = @m[2]
+						attributes = @m[3]
+						
+						is_single = false
+						if attributes =~ /\A(.*)\/\Z/
+							attributes = $1
+							is_single = true
+						end
+					
+						if TO_SANITIZE.include? tag 
+							attributes.strip!
+					#		puts "Attributes: #{attributes.inspect}"
+							if attributes.size > 0
+								@already +=  '<%s %s />' % [tag, attributes]
+							else
+								@already +=  '<%s />' % [tag]
+							end
+						elsif is_closing
+							@already += @m.to_s
+							if @tag_stack.empty?
+								error "Malformed: closing tag #{tag.inspect} "+
+								      "in empty list"
+							end 
+							if @tag_stack.last != tag
+								error "Malformed: tag <#{tag}> "+
+								      "closes <#{@tag_stack.last}>"
+							end
+							@tag_stack.pop
+						elsif not is_single
+							@tag_stack.push tag
+							@already += @m.to_s
+						end
+					elsif @m = EverythingElse.match(@rest)
+						@already += @m.pre_match + @m.to_s
+						@rest = @m.post_match
+					else
+						error "Malformed HTML: not complete: #{@rest.inspect}"
+					end
+				end # not inside comment
+				
+#				puts inspect
+#				puts "Read: #{@tag_stack.inspect}"
+				break if is_finished? and things_read>0	
+			end
+		end
+
+
+		def error(s)
+			raise Exception, "Error: #{s} \n"+ inspect, caller
+		end
+
+		def inspect; "HTML READER\n comment=#{@inside_comment} "+
+			"match=#{@m.to_s.inspect}\n"+
+			"Tag stack = #{@tag_stack.inspect} \n"+
+			"Before:\n"+
+			add_tabs(@already,1,'|')+"\n"+
+			"After:\n"+
+			add_tabs(@rest,1,'|')+"\n"
+			
+		end
+		
+		
+		def stuff_you_read
+			@already
+		end
+		
+		def is_finished?
+			not @inside_comment and @tag_stack.empty?
+		end
+	end # html helper 
+
+end end end end
--- a/lib/maruku/input/linesource.rb
+++ b/lib/maruku/input/linesource.rb
@ -0,0 +1,111 @@
+#--
+#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
+#
+# This file is part of Maruku.
+# 
+#   Maruku is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+# 
+#   Maruku is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+# 
+#   You should have received a copy of the GNU General Public License
+#   along with Maruku; if not, write to the Free Software
+#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#++
+
+
+module MaRuKu; module In; module Markdown; module BlockLevelParser
+
+# This represents a source of lines that can be consumed.
+#
+# It is the twin of CharSource.
+#
+	
+class LineSource
+	include MaRuKu::Strings
+	
+	def initialize(lines, parent=nil, parent_offset=nil)
+		raise "NIL lines? " if not lines
+		@lines = lines
+		@lines_index = 0
+		@parent = parent
+		@parent_offset = parent_offset
+	end
+	
+	def cur_line()  @lines[@lines_index] end
+	def next_line() @lines[@lines_index+1] end
+		
+	def shift_line() 
+		raise "Over the rainbow" if @lines_index >= @lines.size 
+		l = @lines[@lines_index]
+		@lines_index += 1
+		return l
+	end
+	
+	def ignore_line
+		raise "Over the rainbow" if @lines_index >= @lines.size 
+		@lines_index += 1
+	end
+	
+	def describe
+		#s = "At line ##{@lines_index} of #{@lines.size}:\n"
+		s = "At line #{original_line_number(@lines_index)}\n"
+		
+		context = 3 # lines
+		from = [@lines_index-context, 0].max
+		to   = [@lines_index+context, @lines.size-1].min
+		
+		for i in from..to
+			prefix = (i == @lines_index) ? '--> ' : '    ';
+			l = @lines[i]
+			s += "%10s %4s|#{l}" %
+				[@lines[i].md_type.to_s, prefix]
+				
+			s += "|\n"
+		end
+		
+#		if @parent 
+#			s << "Parent context is: \n"
+#			s << add_tabs(@parent.describe,1,'|')
+#		end
+		s
+	end
+	
+	def original_line_number(index)
+		if @parent
+			return index + @parent.original_line_number(@parent_offset)
+		else
+			1 + index
+		end
+	end
+	
+	def cur_index
+		@lines_index
+	end
+	
+	# Returns the type of next line as a string
+	# breaks at first :definition
+	def tell_me_the_future
+		s = ""; num_e = 0;
+		for i in @lines_index..@lines.size-1
+			c = case @lines[i].md_type
+				when :text; "t"
+				when :empty; num_e+=1; "e"
+				when :definition; "d"
+				else "o"
+			end
+			s += c
+			break if c == "d" or num_e>1
+		end
+		s	
+	end
+	
+end # linesource
+
+end end end end # block
+
--- a/lib/maruku/input/parse_block.rb
+++ b/lib/maruku/input/parse_block.rb
@ -0,0 +1,594 @@
+#--
+#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
+#
+# This file is part of Maruku.
+# 
+#   Maruku is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+# 
+#   Maruku is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+# 
+#   You should have received a copy of the GNU General Public License
+#   along with Maruku; if not, write to the Free Software
+#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#++
+
+
+module MaRuKu; module In; module Markdown; module BlockLevelParser
+
+	include Helpers
+	include MaRuKu::Strings
+	include MaRuKu::In::Markdown::SpanLevelParser
+
+	class BlockContext < Array
+		def describe
+			n = 5
+			desc = size > n ? self[-n,n] : self
+			"Last #{n} elements: "+
+			desc.map{|x| "\n -" + x.inspect}.join
+		end
+	end
+	
+	# Splits the string and calls parse_lines_as_markdown
+	def parse_text_as_markdown(text)
+		lines =  split_lines(text)
+		src = LineSource.new(lines)
+		return parse_blocks(src)
+	end
+	
+	# Input is a LineSource
+	def parse_blocks(src)
+		output = BlockContext.new
+		
+		# run state machine
+		while src.cur_line
+			
+			next if check_block_extensions(src, output, src.cur_line)
+			
+#  Prints detected type (useful for debugging)
+#			puts "#{src.cur_line.md_type}|#{src.cur_line}"
+			case src.cur_line.md_type
+				when :empty; 
+					output.push :empty
+					src.ignore_line
+				when :ial
+					m =  InlineAttributeList.match src.shift_line
+					content = m[1] ||  "" 
+					src2 = CharSource.new(content, src)
+					interpret_extension(src2, output, [nil])
+				when :ald
+					output.push read_ald(src)
+				when :text
+					if src.cur_line =~ MightBeTableHeader and 
+						(src.next_line && src.next_line =~ TableSeparator)
+						output.push read_table(src)
+					elsif [:header1,:header2].include? src.next_line.md_type
+						output.push read_header12(src)
+					elsif eventually_comes_a_def_list(src)
+					 	definition = read_definition(src)
+						if output.last.kind_of?(MDElement) && 
+							output.last.node_type == :definition_list then
+							output.last.children << definition
+						else
+							output.push md_el(:definition_list, [definition])
+						end
+					else # Start of a paragraph
+						output.push read_paragraph(src)
+					end
+				when :header2, :hrule
+					# hrule
+					src.shift_line
+					output.push md_hrule()
+				when :header3
+					output.push read_header3(src)
+				when :ulist, :olist
+					list_type = src.cur_line.md_type == :ulist ? :ul : :ol
+					li = read_list_item(src)
+					# append to current list if we have one
+					if output.last.kind_of?(MDElement) && 
+						output.last.node_type == list_type then
+						output.last.children << li
+					else
+						output.push md_el(list_type, [li])
+					end
+				when :quote;    output.push read_quote(src)
+				when :code;     e = read_code(src); output << e if e
+				when :raw_html; e = read_raw_html(src); output << e if e
+
+				when :footnote_text;   output.push read_footnote_text(src)
+				when :ref_definition;  output.push read_ref_definition(src)
+				when :abbreviation;    output.push read_abbreviation(src)
+				when :xml_instr;       read_xml_instruction(src, output)
+				when :metadata;        
+					maruku_error "Please use the new meta-data syntax: \n"+
+					"  http://maruku.rubyforge.org/proposal.html\n", src
+					src.ignore_line
+				else # warn if we forgot something
+					md_type = src.cur_line.md_type
+					line = src.cur_line
+					maruku_error "Ignoring line '#{line}' type = #{md_type}", src
+					src.shift_line
+			end
+		end
+
+		merge_ial(output, src, output)
+		output.delete_if {|x| x.kind_of?(MDElement) &&
+			x.node_type == :ial}
+		
+		# get rid of empty line markers
+		output.delete_if {|x| x == :empty}
+		# See for each list if we can omit the paragraphs and use li_span
+		# TODO: do this after
+		output.each do |c| 
+			# Remove paragraphs that we can get rid of
+			if [:ul,:ol].include? c.node_type 
+				if c.children.all? {|li| !li.want_my_paragraph} then
+					c.children.each do |d|
+						d.node_type = :li_span
+						d.children = d.children[0].children 
+					end
+				end
+			end 
+			if c.node_type == :definition_list
+				if c.children.all?{|defi| !defi.want_my_paragraph} then
+					c.children.each do |definition| 
+						definition.definitions.each do |dd|
+							dd.children = dd.children[0].children 
+						end
+					end
+				end
+			end 
+		end
+		
+		output
+	end
+	
+	
+	
+	def read_ald(src)
+		if (l=src.shift_line) =~ AttributeDefinitionList
+			id = $1;   al=$2;
+			al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])
+			self.ald[id] = al;
+			return md_ald(id, al)
+		else
+			maruku_error "Bug Bug:\n#{l.inspect}"
+			return nil
+		end
+	end
+		
+	# reads a header (with ----- or ========)
+	def read_header12(src)
+		line = src.shift_line.strip
+		al = nil
+		# Check if there is an IAL
+		if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
+			line = $1.strip
+			ial = $2
+			al  = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
+		end
+		text = parse_lines_as_span [ line ]
+		level = src.cur_line.md_type == :header2 ? 2 : 1;  
+		src.shift_line
+		return md_header(level, text, al)
+	end
+
+	# reads a header like '#### header ####'	
+	def read_header3(src)
+		line = src.shift_line.strip
+		al = nil
+		# Check if there is an IAL
+		if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
+			line = $1.strip
+			ial = $2
+			al  = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
+		end
+		level = num_leading_hashes(line)
+		text = parse_lines_as_span [strip_hashes(line)] 
+		return md_header(level, text, al)
+	end
+
+	def read_xml_instruction(src, output)
+		m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
+		raise "BugBug" if not m
+		target = m[2] || ''
+		code = m[3]
+		until code =~ /\?>/
+			code += "\n"+src.shift_line
+		end
+		if not code =~ (/\?>\s*$/)
+			garbage = (/\?>(.*)$/.match(code))[1]
+			maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+
+				add_tabs(code, 1, '|'), src
+		end
+		code.gsub!(/\?>\s*$/, '')
+		
+		if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
+			result = safe_execute_code(self, code)	
+			if result
+				if result.kind_of? String
+					raise "Not expected"
+				else
+					output.push *result
+				end
+			end
+		else
+			output.push md_xml_instr(target, code)
+		end
+	end
+	
+	def read_raw_html(src)
+		h = HTMLHelper.new
+		begin 
+			h.eat_this(l=src.shift_line)
+#			puts "\nBLOCK:\nhtml -> #{l.inspect}"
+			while src.cur_line and not h.is_finished? 
+				l=src.shift_line
+#				puts "html -> #{l.inspect}"
+				h.eat_this "\n"+l
+			end
+		rescue Exception => e
+			ex = e.inspect + e.backtrace.join("\n")
+			maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'|')}\n", src
+		end
+		raw_html = h.stuff_you_read
+		return md_html(raw_html)
+	end
+	
+	def read_paragraph(src)
+		lines = []
+		while src.cur_line 
+			# :olist does not break
+			case t = src.cur_line.md_type
+				when :quote,:header3,:empty,:raw_html,:ref_definition,:ial,:xml_instr
+					break
+				when :olist,:ulist
+					break if src.next_line.md_type == t
+			end
+			break if src.cur_line.strip.size == 0			
+			break if [:header1,:header2].include? src.next_line.md_type
+			break if any_matching_block_extension?(src.cur_line)
+			
+			lines << src.shift_line
+		end
+#		dbg_describe_ary(lines, 'PAR')
+		children = parse_lines_as_span(lines, src)
+
+		return md_par(children)
+	end
+	
+	# Reads one list item, either ordered or unordered.
+	def read_list_item(src)
+		parent_offset = src.cur_index
+		
+		item_type = src.cur_line.md_type
+		first = src.shift_line
+
+		# Ugly things going on inside `read_indented_content`
+		indentation = spaces_before_first_char(first)
+		break_list = [:ulist, :olist, :ial]
+		lines, want_my_paragraph = 
+			read_indented_content(src,indentation, break_list, item_type)
+
+		# add first line
+			# Strip first '*', '-', '+' from first line
+			stripped = first[indentation, first.size-1]
+		lines.unshift stripped
+		
+		#dbg_describe_ary(lines, 'LIST ITEM ')
+
+		src2 = LineSource.new(lines, src, parent_offset)
+		children = parse_blocks(src2)
+		with_par = want_my_paragraph || (children.size>1)
+		
+		return md_li(children, with_par)
+	end
+
+	def read_abbreviation(src)
+		if not (l=src.shift_line) =~ Abbreviation
+			maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
+		end
+		
+		abbr = $1
+		desc = $2
+		
+		if (not abbr) or (abbr.size==0)
+			maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
+		end
+		
+		self.abbreviations[abbr] = desc
+		
+		return md_abbr_def(abbr, desc)
+	end
+	
+	def read_footnote_text(src)
+		parent_offset = src.cur_index
+			
+		first = src.shift_line
+		
+		if not first =~ FootnoteText 
+			maruku_error "Bug (it's Andrea's fault)"
+		end
+		
+		id = $1
+		text = $2
+
+		# Ugly things going on inside `read_indented_content`
+		indentation = 4 #first.size-text.size
+		
+#		puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
+		
+		break_list = [:footnote_text]
+		item_type = :footnote_text
+		lines, want_my_paragraph = 
+			read_indented_content(src,indentation, break_list, item_type)
+
+		# add first line
+		if text && text.strip != "" then lines.unshift text end
+		
+#		dbg_describe_ary(lines, 'FOOTNOTE')
+		src2 = LineSource.new(lines, src, parent_offset)
+		children = parse_blocks(src2)
+		
+		e = md_footnote(id, children)
+		self.footnotes[id] = e
+		return e
+	end
+
+
+	# This is the only ugly function in the code base.
+	# It is used to read list items, descriptions, footnote text
+	def read_indented_content(src, indentation, break_list, item_type)
+		lines =[]
+		# collect all indented lines
+		saw_empty = false; saw_anything_after = false
+		while src.cur_line 
+			#puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"
+			if src.cur_line.md_type == :empty
+				saw_empty = true
+				lines << src.shift_line
+				next
+			end
+		
+			# after a white line
+			if saw_empty
+				# we expect things to be properly aligned
+				if (ns=number_of_leading_spaces(src.cur_line)) < indentation
+					#puts "breaking for spaces, only #{ns}: #{src.cur_line}"
+					break
+				end
+				saw_anything_after = true
+			else
+				break if break_list.include? src.cur_line.md_type
+#				break if src.cur_line.md_type != :text
+			end
+		
+
+			stripped = strip_indent(src.shift_line, indentation)
+			lines << stripped
+
+			#puts "Accepted as #{stripped.inspect}"
+		
+			# You are only required to indent the first line of 
+			# a child paragraph.
+			if stripped.md_type == :text
+				while src.cur_line && (src.cur_line.md_type == :text)
+					lines << strip_indent(src.shift_line, indentation)
+				end
+			end
+		end
+
+		want_my_paragraph = saw_anything_after || 
+			(saw_empty && (src.cur_line  && (src.cur_line.md_type == item_type))) 
+	
+#		dbg_describe_ary(lines, 'LI')
+		# create a new context 
+	
+		while lines.last && (lines.last.md_type == :empty)
+			lines.pop
+		end
+		
+		return lines, want_my_paragraph
+	end
+
+	
+	def read_quote(src)
+		parent_offset = src.cur_index
+			
+		lines = []
+		# collect all indented lines
+		while src.cur_line && src.cur_line.md_type == :quote
+			lines << unquote(src.shift_line)
+		end
+#		dbg_describe_ary(lines, 'QUOTE')
+
+		src2 = LineSource.new(lines, src, parent_offset)
+		children = parse_blocks(src2)
+		return md_quote(children)
+	end
+
+	def read_code(src)
+		# collect all indented lines
+		lines = []
+		while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)
+			lines << strip_indent(src.shift_line, 4)
+		end
+		
+		#while lines.last && (lines.last.md_type == :empty )
+		while lines.last && lines.last.strip.size == 0
+			lines.pop 
+		end
+
+		while lines.first && lines.first.strip.size == 0
+			lines.shift 
+		end
+		
+		return nil if lines.empty?
+
+		source = lines.join("\n")
+		
+#		dbg_describe_ary(lines, 'CODE')
+
+		return md_codeblock(source)
+	end
+
+	# Reads a series of metadata lines with empty lines in between
+	def read_metadata(src)
+		hash = {}
+		while src.cur_line 
+			case src.cur_line.md_type
+				when :empty;  src.shift_line
+				when :metadata; hash.merge! parse_metadata(src.shift_line)
+				else break
+			end
+		end
+		hash
+	end
+	
+		
+	def read_ref_definition(src)
+		line = src.shift_line
+		
+		# if link is incomplete, shift next line
+		if src.cur_line && (src.cur_line.md_type != :ref_definition) && 
+			([1,2,3].include? number_of_leading_spaces(src.cur_line) )
+			line += " "+ src.shift_line
+		end
+		
+#		puts "total= #{line}"
+		
+		match = LinkRegex.match(line)
+		if not match
+			error "Link does not respect format: '#{line}'"
+		end
+		
+		id = match[1]; url = match[2]; title = match[3]; 
+		id = id.strip.downcase
+		
+		hash = self.refs[id] = {:url=>url,:title=>title}
+		
+		stuff=match[4]
+		
+		if stuff
+			stuff.split.each do |couple|
+#					puts "found #{couple}"
+				k, v = couple.split('=')
+				v ||= ""
+				if v[0,1]=='"' then v = v[1, v.size-2] end
+#					puts "key:_#{k}_ value=_#{v}_"
+				hash[k.to_sym] = v
+			end
+		end
+#			puts hash.inspect
+		
+		return md_ref_def(id, url, meta={:title=>title})
+	end
+	
+	def read_table(src)
+		
+		def split_cells(s)
+			s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
+		end
+		
+		head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) }
+			
+		separator=split_cells(src.shift_line)
+
+		align = separator.map { |s|  s =~ Sep
+			if $1 and $2 then :center elsif $2 then :right else :left end }
+				
+		num_columns = align.size
+		
+		if head.size != num_columns
+			maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
+			tell_user "I will ignore this table."
+			# XXX try to recover
+			return md_br()
+		end
+				
+		rows = []
+		
+		while src.cur_line && src.cur_line =~ /\|/
+			row = split_cells(src.shift_line).map{|s|
+				md_el(:cell, parse_lines_as_span([s]))}
+			if head.size != num_columns
+				maruku_error  "Row does not have #{num_columns} columns: \n#{row.inspect}"
+				tell_user "I will ignore this table."
+				# XXX try to recover
+				return md_br()
+			end
+			rows << row
+		end
+
+		children = (head+rows).flatten
+		return md_el(:table, children, {:align => align})
+	end
+	
+	# If current line is text, a definition list is coming
+	# if 1) text,empty,[text,empty]*,definition
+	
+	def eventually_comes_a_def_list(src)
+		future = src.tell_me_the_future
+		ok = future =~ %r{^t+e?d}x
+#		puts "future: #{future} - #{ok}"
+		ok
+	end
+	
+		
+	def read_definition(src)
+		# Read one or more terms
+		terms = []
+		while  src.cur_line &&  src.cur_line.md_type == :text
+			terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))
+		end
+#		dbg_describe_ary(terms, 'DT')
+
+		want_my_paragraph = false
+
+		raise "Chunky Bacon!" if not src.cur_line
+
+		# one optional empty
+		if src.cur_line.md_type == :empty
+			want_my_paragraph = true
+			src.shift_line
+		end
+		
+		raise "Chunky Bacon!" if src.cur_line.md_type != :definition
+		
+		# Read one or more definitions
+		definitions = []
+		while src.cur_line && src.cur_line.md_type == :definition
+			parent_offset = src.cur_index
+				
+			first = src.shift_line
+			first =~ Definition
+			first = $1
+			
+			# I know, it's ugly!!!
+
+			lines, w_m_p = 
+				read_indented_content(src,4, [:definition], :definition)
+			want_my_paragraph ||= w_m_p
+		
+			lines.unshift first
+			
+#			dbg_describe_ary(lines, 'DD')
+			src2 = LineSource.new(lines, src, parent_offset)
+			children = parse_blocks(src2)
+			definitions << md_el(:definition_data, children)
+		end
+		
+		return md_el(:definition, terms+definitions, { 	
+			:terms => terms, 
+			:definitions => definitions, 
+			:want_my_paragraph => want_my_paragraph})
+	end
+end # BlockLevelParser
+end # MaRuKu
+end
+end
--- a/lib/maruku/input/parse_doc.rb
+++ b/lib/maruku/input/parse_doc.rb
@ -0,0 +1,225 @@
+#--
+#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
+#
+# This file is part of Maruku.
+# 
+#   Maruku is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+# 
+#   Maruku is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+# 
+#   You should have received a copy of the GNU General Public License
+#   along with Maruku; if not, write to the Free Software
+#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#++
+
+
+require 'iconv'
+
+
+module MaRuKu; module In; module Markdown; module BlockLevelParser
+		
+	def parse_doc(s)
+		
+		meta2 =  parse_email_headers(s)
+		data = meta2[:data]
+		meta2.delete :data
+		
+		self.attributes.merge! meta2
+		
+=begin maruku_doc
+Attribute: encoding
+Scope:     document
+Summary:   Encoding for the document.
+
+If the `encoding` attribute is specified, then the content
+will be converted from the specified encoding to UTF-8.
+
+Conversion happens using the `iconv` library.
+=end
+
+		enc = self.attributes[:encoding]
+		self.attributes.delete :encoding
+		if enc && enc.downcase != 'utf-8'
+			converted = Iconv.new('utf-8', enc).iconv(data)
+			
+#			puts "Data: #{data.inspect}: #{data}"
+#			puts "Conv: #{converted.inspect}: #{converted}"
+			
+			data = converted
+		end
+		
+		@children = parse_text_as_markdown(data)
+		
+		if true #markdown_extra? 
+			self.search_abbreviations
+			self.substitute_markdown_inside_raw_html
+		end
+		
+		toc = create_toc
+
+		# use title if not set
+		if not self.attributes[:title] and toc.header_element
+			title = toc.header_element.to_s
+			self.attributes[:title]  = title
+#			puts "Set document title to #{title}"
+		end
+		
+		# save for later use
+		self.toc = toc
+		
+		# Now do the attributes magic
+		each_element do |e|
+			# default attribute list
+			if default = self.ald[e.node_type.to_s]
+				expand_attribute_list(default, e.attributes)
+			end
+			expand_attribute_list(e.al, e.attributes)
+#			puts "#{e.node_type}: #{e.attributes.inspect}"
+		end
+	
+=begin maruku_doc
+Attribute: unsafe_features
+Scope:     global
+Summary:   Enables execution of XML instructions.
+
+Disabled by default because of security concerns.
+=end
+
+		if Maruku::Globals[:unsafe_features]
+			self.execute_code_blocks
+			# TODO: remove executed code blocks
+		end
+	end
+	
+	# Expands an attribute list in an Hash
+	def expand_attribute_list(al, result)
+		al.each do |k, v|
+			case k
+			when :class
+				if not result[:class]
+					result[:class] = v
+				else
+					result[:class] += " " + v
+				end
+			when :id; result[:id] = v
+			when :ref; 
+				if self.ald[v]
+					already = (result[:expanded_references] ||= [])
+					if not already.include?(v)
+						already.push v
+						expand_attribute_list(self.ald[v], result)
+					else
+						already.push  v
+						maruku_error "Circular reference between labels.\n\n"+
+						"Label #{v.inspect} calls itself via recursion.\nThe recursion is "+
+							(already.map{|x| x.inspect}.join(' => ')) 
+					end
+				else
+					if not result[:unresolved_references]
+						result[:unresolved_references] = v
+					else
+						result[:unresolved_references] << " #{v}"
+					end
+					
+					result[v.to_sym] = true
+				end
+			else
+				result[k.to_sym]=v
+			end
+		end
+	end
+
+	def safe_execute_code(object, code)
+		begin
+			return object.instance_eval(code)
+		rescue Exception => e
+			maruku_error "Exception while executing this:\n"+
+				add_tabs(code, 1, ">")+
+				"\nThe error was:\n"+
+				add_tabs(e.inspect+"\n"+e.caller.join("\n"), 1, "|")
+		rescue RuntimeError => e
+			maruku_error "2: Exception while executing this:\n"+
+				add_tabs(code, 1, ">")+
+				"\nThe error was:\n"+
+				add_tabs(e.inspect, 1, "|")
+		rescue SyntaxError => e
+			maruku_error "2: Exception while executing this:\n"+
+				add_tabs(code, 1, ">")+
+				"\nThe error was:\n"+
+				add_tabs(e.inspect, 1, "|")
+		end
+		nil
+	end
+	
+	def execute_code_blocks
+		self.each_element(:xml_instr) do |e|
+			if e.target == 'maruku'
+				result = safe_execute_code(e, e.code)
+				if result.kind_of?(String)
+					puts "Result is : #{result.inspect}"
+				end
+			end
+		end
+	end
+	
+	def search_abbreviations
+		self.abbreviations.each do |abbrev, title|
+			reg = Regexp.new(Regexp.escape(abbrev))
+			self.replace_each_string do |s|
+				if m = reg.match(s)
+					e = md_abbr(abbrev.dup, title ? title.dup : nil)
+					[m.pre_match, e, m.post_match]
+				else
+					s
+				end
+			end
+		end
+	end
+	
+	include REXML
+	# (PHP Markdown extra) Search for elements that have
+	# markdown=1 or markdown=block defined
+	def substitute_markdown_inside_raw_html
+		self.each_element(:raw_html) do |e|
+			doc = e.instance_variable_get :@parsed_html
+			if doc # valid html
+				# parse block-level markdown elements in these HTML tags
+				block_tags = ['div']
+
+				# use xpath to find elements with 'markdown' attribute
+				XPath.match(doc, "//*[attribute::markdown]" ).each do |e|
+#					puts "Found #{e}"
+					# should we parse block-level or span-level?
+					parse_blocks = (e.attributes['markdown'] == 'block') || 
+					               block_tags.include?(e.name)
+					# remove 'markdown' attribute
+					e.delete_attribute 'markdown'
+					# Select all text elements of e
+					XPath.match(e, "//text()" ).each { |original_text| 
+						s = original_text.value.strip
+						if s.size > 0
+							el = md_el(:dummy,
+							 	parse_blocks ? parse_text_as_markdown(s) :
+							                  parse_lines_as_span([s]) )
+							p = original_text.parent
+							el.children_to_html.each do |x|
+								p.insert_before(original_text, x)
+							end
+							p.delete(original_text)
+							
+						end
+					}
+						
+				end
+				
+			end
+		end
+	end
+	
+end end end end
--- a/lib/maruku/input/parse_span_better.rb
+++ b/lib/maruku/input/parse_span_better.rb
@ -0,0 +1,692 @@
+#--
+#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
+#
+# This file is part of Maruku.
+# 
+#   Maruku is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+# 
+#   Maruku is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+# 
+#   You should have received a copy of the GNU General Public License
+#   along with Maruku; if not, write to the Free Software
+#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#++
+
+
+require 'set'
+
+module MaRuKu; module In; module Markdown; module SpanLevelParser
+	include MaRuKu::Helpers
+	
+	EscapedCharInText = 
+		Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
+
+	EscapedCharInQuotes = 
+		Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
+	
+	EscapedCharInInlineCode = [?\\,?`]
+
+	def parse_lines_as_span(lines, parent=nil)
+		parse_span_better lines.join("\n"), parent
+	end
+
+	def parse_span_better(string, parent=nil)
+		if not string.kind_of? String then 
+			error "Passed #{string.class}." end
+
+		st = (string + "")
+		st.freeze
+		src = CharSource.new(st, parent)
+		read_span(src, EscapedCharInText, [nil])
+	end
+		
+	# This is the main loop for reading span elements
+	#
+	# It's long, but not *complex* or difficult to understand.
+	#
+	#
+	def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
+		con = SpanContext.new
+		c = d = nil
+		while true
+			c = src.cur_char
+
+			# This is only an optimization which cuts 50% of the time used.
+			# (but you can't use a-zA-z in exit_on_chars)
+			if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
+				con.cur_string << src.shift_char
+				next
+			end
+
+			break if exit_on_chars && exit_on_chars.include?(c)
+			break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
+			
+			# check if there are extensions
+			if check_span_extensions(src, con)
+				next
+			end
+			
+			case c = src.cur_char	
+			when ?\ # it's space (32)
+				if src.cur_chars_are "  \n"
+					src.ignore_chars(3)
+					con.push_element  md_br()
+					next
+				else
+					src.ignore_char
+					con.push_space 
+				end
+			when ?\n, ?\t 
+				src.ignore_char
+				con.push_space 
+			when ?`
+				read_inline_code(src,con)
+			when ?<
+				# It could be:
+				# 1) HTML "<div ..."
+				# 2) HTML "<!-- ..."
+				# 3) url "<http:// ", "<ftp:// ..."
+				# 4) email "<andrea@... ", "<mailto:andrea@..."
+				# 5) on itself! "a < b	"
+				# 6) Start of <<guillemettes>>
+				
+				case d = src.next_char
+					when ?<;  # guillemettes
+						src.ignore_chars(2)
+						con.push_char ?<
+						con.push_char ?<
+					when ?!; 
+						if src.cur_chars_are '<!--'
+							read_inline_html(src, con)
+						else 
+							con.push_char src.shift_char
+						end
+					when ?? 
+						read_xml_instr_span(src, con) 
+					when ?\ , ?\t 
+						con.push_char src.shift_char
+					else
+						if src.next_matches(/<mailto:/) or
+						   src.next_matches(/<[\w\.]+\@/)
+							read_email_el(src, con)
+						elsif src.next_matches(/<\w+:/)
+							read_url_el(src, con)
+						elsif src.next_matches(/<\w/)
+							#puts "This is HTML: #{src.cur_chars(20)}"
+							read_inline_html(src, con)
+						else 
+							#puts "This is NOT HTML: #{src.cur_chars(20)}"
+							con.push_char src.shift_char
+						end
+				end
+			when ?\\
+				d = src.next_char
+				if d == ?'
+					src.ignore_chars(2)
+					con.push_element md_entity('apos')
+				elsif d == ?"
+					src.ignore_chars(2)
+					con.push_element md_entity('quot')
+				elsif escaped.include? d
+					src.ignore_chars(2)
+					con.push_char d
+				else
+					con.push_char src.shift_char
+				end
+			when ?[
+				if markdown_extra? && src.next_char == ?^
+					read_footnote_ref(src,con)
+				else
+					read_link(src, con)
+				end
+			when ?!
+				if src.next_char == ?[
+					read_image(src, con)
+				else
+					con.push_char src.shift_char
+				end
+			when ?&
+				if m = src.read_regexp(/\&([\w\d]+);/)
+					con.push_element md_entity(m[1])
+				else
+					con.push_char src.shift_char
+				end
+			when ?*
+				if not src.next_char
+					maruku_error "Opening * as last char.", src, con
+					maruku_recover "Threating as literal"
+					con.push_char src.shift_char
+				else
+					follows = src.cur_chars(4)
+					if follows =~ /^\*\*\*[^\s\*]/
+						con.push_element read_emstrong(src,'***')
+					elsif follows  =~ /^\*\*[^\s\*]/
+						con.push_element read_strong(src,'**')
+					elsif follows =~ /^\*[^\s\*]/
+						con.push_element read_em(src,'*')
+					else # * is just a normal char
+						con.push_char src.shift_char
+					end
+				end
+			when ?_
+				if not src.next_char
+					maruku_error "Opening _ as last char", src, con
+					maruku_recover "Threating as literal", src, con
+					con.push_char src.shift_char
+				else
+					follows = src.cur_chars(4)
+					if  follows =~ /^\_\_\_[^\s\_]/
+						con.push_element read_emstrong(src,'___')
+					elsif follows  =~ /^\_\_[^\s\_]/
+						con.push_element read_strong(src,'__')
+					elsif follows =~ /^\_[^\s\_]/
+						con.push_element read_em(src,'_')
+					else # _ is just a normal char
+						con.push_char src.shift_char
+					end
+				end
+			when ?{ # extension
+				src.ignore_char # {
+				interpret_extension(src, con, [?}])
+				src.ignore_char # }
+			when nil
+				maruku_error ("Unclosed span (waiting for %s"+
+				 "#{exit_on_strings.inspect})") % [
+						exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
+						src,con
+				break
+			else # normal text
+				con.push_char src.shift_char
+			end # end case
+		end # end while true
+		con.push_string_if_present 
+
+		# Assign IAL to elements
+		merge_ial(con.elements, src, con)
+		
+		
+		# Remove leading space
+		if (s = con.elements.first).kind_of? String
+			if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
+			con.elements.shift if s.size == 0 
+		end
+		
+		# Remove final spaces
+		if (s = con.elements.last).kind_of? String
+			s.chop! if s[-1] == ?\ 
+			con.elements.pop if s.size == 0 
+		end
+		
+		educated = educate(con.elements)
+
+		educated
+	end
+
+
+	def read_xml_instr_span(src, con) 
+		src.ignore_chars(2) # starting <?
+
+		# read target <?target code... ?>
+		target = if m = src.read_regexp(/(\w+)/)
+			m[1]
+		else
+			''
+		end
+		
+		delim = "?>"
+		
+		code = 
+			read_simple(src, escaped=[], break_on_chars=[], 
+			break_on_strings=[delim])
+		
+		src.ignore_chars delim.size
+		
+		code = (code || "").strip
+		con.push_element md_xml_instr(target, code)
+	end
+
+	# Start: cursor on character **after** '{'
+	# End: curson on '}' or EOF
+	def interpret_extension(src, con, break_on_chars)
+		case src.cur_char
+		when ?:
+			src.ignore_char # :
+			extension_meta(src, con, break_on_chars)
+		when ?#, ?.
+			extension_meta(src, con, break_on_chars)
+		else
+			stuff = read_simple(src, escaped=[?}], break_on_chars, [])
+			if stuff =~ /^(\w+\s|[^\w])/
+				extension_id = $1.strip
+				if false
+				else
+					maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
+						"I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
+					extension_meta(src, con, break_on_chars)
+				end
+			else 
+				maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
+				extension_meta(src, con, break_on_chars)
+			end
+		end
+	end
+
+	def extension_meta(src, con, break_on_chars)
+		if m = src.read_regexp(/(\w)+\:/)
+			name = m[1]
+			content = m[2]
+			al = read_attribute_list(src, con, break_on_chars)
+			self.doc.ald[name] = al
+		 	con.push md_ald(name, al)
+		else
+			al = read_attribute_list(src, con, break_on_chars)
+			self.doc.ald[name] = al
+			con.push md_ial(al)
+		end
+	end	
+
+	def read_url_el(src,con)
+		src.ignore_char # leading <
+		url = read_simple(src, [], [?>])
+		src.ignore_char # closing >
+		
+		con.push_element md_url(url)
+	end
+
+	def read_email_el(src,con)
+		src.ignore_char # leading <
+		mail = read_simple(src, [], [?>])
+		src.ignore_char # closing >
+		
+		address = mail.gsub(/^mailto:/,'')
+		con.push_element md_email(address)
+	end
+	
+	def read_url(src, break_on)
+		if [?',?"].include? src.cur_char 
+			error 'Invalid char for url', src
+		end
+		
+		url = read_simple(src, [], break_on)
+		if not url # empty url
+			url = ""
+		end
+		
+		if url[0] == ?< && url[-1] == ?>
+			url = url[1, url.size-2]
+		end
+		
+		if url.size == 0 
+			return nil
+		end
+		
+		url
+	end
+	
+	
+	def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
+		case src.cur_char
+		when ?', ?"
+			read_quoted(src, con)
+		else
+			read_simple(src, escaped, exit_on_chars)
+		end
+	end
+	
+	# Tries to read a quoted value. If stream does not
+	# start with ' or ", returns nil.
+	def read_quoted(src, con)
+		case src.cur_char
+			when ?', ?"
+				quote_char = src.shift_char # opening quote
+				string = read_simple(src, EscapedCharInQuotes, [quote_char])
+				src.ignore_char # closing quote
+				return string
+			else 
+#				puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
+				return nil
+		end
+	end
+	
+	# Reads a simple string (no formatting) until one of break_on_chars, 
+	# while escaping the escaped.
+	# If the string is empty, it returns nil.
+	# Raises on error if the string terminates unexpectedly.
+#	# If eat_delim is true, and if the delim is not the EOF, then the delim
+#	# gets eaten from the stream.
+	def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
+		text = ""
+		while true
+#			puts "Reading simple #{text.inspect}"
+			c = src.cur_char
+			if exit_on_chars && exit_on_chars.include?(c)
+#				src.ignore_char if eat_delim
+				break
+			end
+			
+			break if exit_on_strings && 
+				exit_on_strings.any? {|x| src.cur_chars_are x}
+			
+			case c
+			when nil
+				s= "String finished while reading (break on "+
+				"#{exit_on_chars.map{|x|""<<x}.inspect})"+
+				" already read: #{text.inspect}"
+				maruku_error s, src
+				maruku_recover "I boldly continue", src
+				break
+			when ?\\
+				d = src.next_char
+				if escaped.include? d
+					src.ignore_chars(2)
+					text << d
+				else
+					text << src.shift_char
+				end
+			else 
+				text << src.shift_char
+			end
+		end
+#		puts "Read simple #{text.inspect}"
+		text.empty? ? nil : text
+	end
+	
+	def read_em(src, delim)
+		src.ignore_char
+		children = read_span(src, EscapedCharInText, nil, [delim])
+		src.ignore_char
+		md_em(children)
+	end
+	
+	def read_strong(src, delim)
+		src.ignore_chars(2)
+		children = read_span(src, EscapedCharInText, nil, [delim])
+		src.ignore_chars(2)
+		md_strong(children)
+	end
+
+	def read_emstrong(src, delim)
+		src.ignore_chars(3)
+		children = read_span(src, EscapedCharInText, nil, [delim])
+		src.ignore_chars(3)
+		md_emstrong(children)
+	end
+	
+	SPACE = ?\ # = 32
+	
+#	R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
+	R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
+	
+	# Reads a bracketed id "[refid]". Consumes also both brackets.
+	def read_ref_id(src, con)
+		src.ignore_char # [
+		src.consume_whitespace
+#		puts "Next: #{src.cur_chars(10).inspect}"
+		if m = src.read_regexp(R_REF_ID) 
+#			puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
+#			puts "Then: #{src.cur_chars(10).inspect}"
+			m[1]
+		else
+			nil
+		end
+	end
+	
+	def read_footnote_ref(src,con)
+		ref = read_ref_id(src,con)
+		con.push_element md_foot_ref(ref)
+	end
+	
+	def read_inline_html(src, con)
+		h = HTMLHelper.new
+		begin
+			# This is our current buffer in the context
+			start = src.current_remaining_buffer
+			
+			h.eat_this start
+			if not h.is_finished?
+				error "inline_html: Malformed:\n "+
+					"#{start.inspect}\n #{h.inspect}",src,con
+			end
+			
+			consumed = start.size - h.rest.size 
+			if consumed > 0
+				con.push_element md_html(h.stuff_you_read)
+				src.ignore_chars(consumed)
+			else
+				puts "HTML helper did not work on #{start.inspect}"
+				con.push_char src.shift_char
+			end
+		rescue Exception => e
+			maruku_error "Bad html: \n" + 
+				add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
+				src,con
+			maruku_recover "I will try to continue after bad HTML.", src, con
+			con.push_char src.shift_char
+		end
+	end
+	
+	def read_inline_code(src, con)
+		# Count the number of ticks
+		num_ticks = 0
+		while src.cur_char == ?` 
+			num_ticks += 1
+			src.ignore_char
+		end
+		# We will read until this string
+		end_string = "`"*num_ticks
+
+		code = 
+			read_simple(src, escaped=[], break_on_chars=[], 
+				break_on_strings=[end_string])
+		
+#		puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
+		src.ignore_chars num_ticks
+		
+		# Ignore at most one space
+		if num_ticks > 1 && code[0] == SPACE
+			code = code[1, code.size-1]
+		end
+		
+		# drop last space 
+		if num_ticks > 1 && code[-1] == SPACE
+			code = code[0,code.size-1]
+		end
+
+#		puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
+		con.push_element md_code(code)
+	end
+	
+	def read_link(src, con)
+		# we read the string and see what happens
+		src.ignore_char # opening bracket
+		children = read_span(src, EscapedCharInText, [?]])
+		src.ignore_char # closing bracket
+
+		# ignore space
+		if src.cur_char == SPACE and 
+			(src.next_char == ?[ or src.next_char == ?( )
+			src.shift_char
+		end
+		
+		case src.cur_char
+		when ?(
+			src.ignore_char # opening (
+			src.consume_whitespace
+			url = read_url(src, [SPACE,?\t,?)])
+			if not url
+				url = '' # no url is ok
+			end
+			src.consume_whitespace
+			title = nil
+			if src.cur_char != ?) # we have a title
+				quote_char = src.cur_char
+				title = read_quoted(src,con)
+				
+				if not title
+					maruku_error 'Must quote title',src,con
+				else
+					# Tries to read a title with quotes: ![a](url "ti"tle")
+					# this is the most ugly thing in Markdown
+					if not src.next_matches(/\s*\)/)
+						# if there is not a closing par ), then read
+						# the rest and guess it's title with quotes
+						rest = read_simple(src, escaped=[], break_on_chars=[?)], 
+							break_on_strings=[])
+						# chop the closing char
+						rest.chop!
+						title << quote_char << rest
+					end
+				end
+			end
+			src.consume_whitespace
+			closing = src.shift_char # closing )
+			if closing != ?)
+				maruku_error 'Unclosed link',src,con
+				maruku_recover "No closing ): I will not create"+
+				" the link for #{children.inspect}", src, con
+				con.push_elements children
+				return
+			end
+			con.push_element md_im_link(children,url, title)
+		when ?[ # link ref
+			ref_id = read_ref_id(src,con)
+			if ref_id
+				con.push_element md_link(children, ref_id)
+			else 
+				maruku_error "Could not read ref_id", src, con
+				maruku_recover "I will not create the link for "+
+					"#{children.inspect}", src, con
+				con.push_elements children
+				return
+			end
+		else # empty [link]
+			con.push_element md_link(children, "")
+		end
+	end # read link
+
+	def read_image(src, con)
+		src.ignore_chars(2) # opening "!["
+		alt_text = read_span(src, EscapedCharInText, [?]])
+		src.ignore_char # closing bracket
+		# ignore space
+		if src.cur_char == SPACE and 
+			(src.next_char == ?[ or src.next_char == ?( )
+			src.ignore_char
+		end
+		case src.cur_char
+		when ?(
+			src.ignore_char # opening (
+			src.consume_whitespace
+			url = read_url(src, [SPACE,?\t,?)])
+			if not url
+				error "Could not read url from #{src.cur_chars(10).inspect}",
+					src,con
+			end
+			src.consume_whitespace
+			title = nil
+			if src.cur_char != ?) # we have a title
+				quote_char = src.cur_char
+				title = read_quoted(src,con)
+				if not title
+					maruku_error 'Must quote title',src,con
+				else				
+					# Tries to read a title with quotes: ![a](url "ti"tle")
+					# this is the most ugly thing in Markdown
+					if not src.next_matches(/\s*\)/)
+						# if there is not a closing par ), then read
+						# the rest and guess it's title with quotes
+						rest = read_simple(src, escaped=[], break_on_chars=[?)], 
+							break_on_strings=[])
+						# chop the closing char
+						rest.chop!
+						title << quote_char << rest
+					end
+				end
+			end
+			src.consume_whitespace
+			closing = src.shift_char # closing )
+			if closing != ?)
+				error ("Unclosed link: '"<<closing<<"'")+
+					" Read url=#{url.inspect} title=#{title.inspect}",src,con
+			end
+			con.push_element md_im_image(alt_text, url, title)
+		when ?[ # link ref
+			ref_id = read_ref_id(src,con)
+			con.push_element md_image(alt_text, ref_id)
+		else # no stuff
+			con.push_elements children
+		end
+	end # read link
+
+
+	class SpanContext 
+		include MaRuKu::Strings
+	
+		# Read elements
+		attr_accessor :elements
+		attr_accessor :cur_string
+	
+		def initialize
+			@elements = []
+			@cur_string = ""
+		end
+	
+		def push_element(e)
+			raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
+			 not (e.kind_of?(String) or e.kind_of?(MDElement))
+		
+			push_string_if_present
+			@elements << e
+			nil
+		end
+		alias push push_element
+		
+		def push_elements(a)
+			for e in a 
+				if e.kind_of? String
+					e.each_byte do |b| push_char b end
+				else
+					push_element e
+				end
+			end
+		end
+		def push_string_if_present
+			if @cur_string.size > 0
+				@elements << @cur_string
+				@cur_string = ""
+			end
+			nil
+		end
+	
+		def push_char(c)
+			@cur_string << c 
+			nil
+		end
+	
+		# push space into current string if
+		# there isn't one
+		def push_space
+			last = @cur_string[@cur_string.size-1]
+			@cur_string << ?\  if last != ?\ 
+		end
+	
+		def describe
+			lines = @elements.map{|x| x.inspect}.join("\n")
+			s = "Elements read in span: \n" +
+			add_tabs(lines,1, ' -')+"\n"
+		
+			if @cur_string.size > 0
+			s += "Current string: \n  #{@cur_string.inspect}\n" 
+			end
+			s
+		end
+	end # SpanContext
+	
+end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser
+
--- a/lib/maruku/input/rubypants.rb
+++ b/lib/maruku/input/rubypants.rb
@ -0,0 +1,225 @@
+#--
+#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
+#
+# This file is part of Maruku.
+# 
+#   Maruku is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+# 
+#   Maruku is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+# 
+#   You should have received a copy of the GNU General Public License
+#   along with Maruku; if not, write to the Free Software
+#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#++
+
+#
+# NOTA BENE: 
+#
+# The following algorithm is a rip-off of RubyPants written by 
+# Christian Neukirchen. 
+#
+# RubyPants is a Ruby port of SmartyPants written by John Gruber.
+#
+# This file is distributed under the GPL, which I guess is compatible
+# with the terms of the RubyPants license.
+#
+# -- Andrea Censi
+
+
+# = RubyPants -- SmartyPants ported to Ruby
+#
+# Ported by Christian Neukirchen <mailto:chneukirchen@gmail.com>
+#   Copyright (C) 2004 Christian Neukirchen
+#
+# Incooporates ideas, comments and documentation by Chad Miller
+#   Copyright (C) 2004 Chad Miller
+#
+# Original SmartyPants by John Gruber
+#   Copyright (C) 2003 John Gruber
+#
+
+#
+# = RubyPants -- SmartyPants ported to Ruby
+#
+#
+# [snip]
+#
+# == Authors
+# 
+# John Gruber did all of the hard work of writing this software in
+# Perl for Movable Type and almost all of this useful documentation.
+# Chad Miller ported it to Python to use with Pyblosxom.
+#
+# Christian Neukirchen provided the Ruby port, as a general-purpose
+# library that follows the *Cloth API.
+# 
+#
+# == Copyright and License
+# 
+# === SmartyPants license:
+# 
+# Copyright (c) 2003 John Gruber
+# (http://daringfireball.net)
+# All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# 
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# 
+# * Neither the name "SmartyPants" nor the names of its contributors
+#   may be used to endorse or promote products derived from this
+#   software without specific prior written permission.
+# 
+# This software is provided by the copyright holders and contributors
+# "as is" and any express or implied warranties, including, but not
+# limited to, the implied warranties of merchantability and fitness
+# for a particular purpose are disclaimed. In no event shall the
+# copyright owner or contributors be liable for any direct, indirect,
+# incidental, special, exemplary, or consequential damages (including,
+# but not limited to, procurement of substitute goods or services;
+# loss of use, data, or profits; or business interruption) however
+# caused and on any theory of liability, whether in contract, strict
+# liability, or tort (including negligence or otherwise) arising in
+# any way out of the use of this software, even if advised of the
+# possibility of such damage.
+# 
+# === RubyPants license
+# 
+# RubyPants is a derivative work of SmartyPants and smartypants.py.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# 
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in
+#   the documentation and/or other materials provided with the
+#   distribution.
+# 
+# This software is provided by the copyright holders and contributors
+# "as is" and any express or implied warranties, including, but not
+# limited to, the implied warranties of merchantability and fitness
+# for a particular purpose are disclaimed. In no event shall the
+# copyright owner or contributors be liable for any direct, indirect,
+# incidental, special, exemplary, or consequential damages (including,
+# but not limited to, procurement of substitute goods or services;
+# loss of use, data, or profits; or business interruption) however
+# caused and on any theory of liability, whether in contract, strict
+# liability, or tort (including negligence or otherwise) arising in
+# any way out of the use of this software, even if advised of the
+# possibility of such damage.
+# 
+#
+# == Links
+#
+# John Gruber:: http://daringfireball.net
+# SmartyPants:: http://daringfireball.net/projects/smartypants
+#
+# Chad Miller:: http://web.chad.org
+#
+# Christian Neukirchen:: http://kronavita.de/chris
+
+
+module MaRuKu; module In; module Markdown; module SpanLevelParser
+	Punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
+	Close_class = %![^\ \t\r\n\\[\{\(\-]!
+
+	Rules = [
+		[/---/,   :mdash          ],
+		[/--/,    :ndash          ],
+		['...',   :hellip         ],
+		['. . .', :hellip         ],
+		["``",    :ldquo          ],
+		["''",    :rdquo          ],
+		[/<<\s/,  [:laquo, :nbsp] ],
+		[/\s>>/,  [:nbsp, :raquo] ],
+		[/<</,    :laquo          ],
+		[/>>/,    :raquo          ],
+		
+#		def educate_single_backticks(str)
+#		["`", :lsquo]
+#		["'", :rsquo]
+
+		# Special case if the very first character is a quote followed by
+		# punctuation at a non-word-break. Close the quotes by brute
+		# force:
+		[/^'(?=#{Punct_class}\B)/, :rsquo],
+		[/^"(?=#{Punct_class}\B)/, :rdquo],
+		# Special case for double sets of quotes, e.g.:
+		#   <p>He said, "'Quoted' words in a larger quote."</p>
+		[/"'(?=\w)/, [:ldquo, :lsquo]    ],
+		[/'"(?=\w)/, [:lsquo, :ldquo]    ],
+		# Special case for decade abbreviations (the '80s):
+		[/'(?=\d\ds)/, :rsquo            ],
+		# Get most opening single quotes:
+		[/(\s)'(?=\w)/, [:one, :lsquo]   ],
+		# Single closing quotes:
+		[/(#{Close_class})'/, [:one, :rsquo]],
+		[/'(\s|s\b|$)/, [:rsquo, :one]],
+		# Any remaining single quotes should be opening ones:
+		[/'/, :lsquo],
+		# Get most opening double quotes:
+		[/(\s)"(?=\w)/, [:one, :ldquo]],
+		# Double closing quotes:
+		[/(#{Close_class})"/, [:one, :rdquo]],
+		[/"(\s|s\b|$)/, [:rdquo, :one]],
+		# Any remaining quotes should be opening ones:
+		[/"/, :ldquo]
+	].
+	map{|reg, subst| # People should do the thinking, machines should do the work.
+		reg = Regexp.new(Regexp.escape(reg)) if not reg.kind_of? Regexp
+		subst = [subst] if not subst.kind_of?Array
+		[reg, subst]}
+
+# note: input will be destroyed
+def apply_one_rule(reg, subst, input)
+	output = []
+	while first = input.shift
+		if first.kind_of?(String) && (m = reg.match(first))
+			output.push    m. pre_match if m. pre_match.size > 0
+			 input.unshift m.post_match if m.post_match.size > 0
+			subst.reverse.each do |x|
+				input.unshift( x == :one ? m[1] : md_entity(x.to_s) ) end
+		else
+			output.push first
+		end
+	end
+	return output
+end
+	
+def educate(elements)
+	Rules.each do |reg, subst|
+		elements = apply_one_rule(reg, subst, elements)
+	end
+	# strips empty strings
+	elements.delete_if {|x| x.kind_of?(String) && x.size == 0}
+	final = []
+	# join consecutive strings
+	elements.each do |x|
+		if x.kind_of?(String) && final.last.kind_of?(String)
+			final.last << x
+		else
+			final << x
+		end
+	end
+	return final
+end
+
+end end end end
--- a/lib/maruku/input/type_detection.rb
+++ b/lib/maruku/input/type_detection.rb
@ -0,0 +1,141 @@
+#--
+#   Copyright (C) 2006  Andrea Censi  <andrea (at) rubyforge.org>
+#
+# This file is part of Maruku.
+# 
+#   Maruku is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+# 
+#   Maruku is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+# 
+#   You should have received a copy of the GNU General Public License
+#   along with Maruku; if not, write to the Free Software
+#   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#++
+
+class String
+	include MaRuKu::Strings
+	def md_type()
+		@md_type ||= line_md_type(self)
+	end
+end
+
+class NilClass
+	def md_type() nil end
+	
+end
+
+# This code does the classification of lines for block-level parsing.
+module MaRuKu; module Strings
+	
+	def line_md_type(l)
+		# The order of evaluation is important (:text is a catch-all)
+		return :text   if l =~ /^[a-zA-Z]/
+		return :code             if number_of_leading_spaces(l)>=4
+		return :empty    if l =~ /^\s*$/
+		return :footnote_text    if l =~ FootnoteText
+		return :ref_definition   if l =~ LinkRegex or l=~ IncompleteLink
+		return :abbreviation     if l =~ Abbreviation
+		return :definition       if l =~ Definition
+		# I had a bug with emails and urls at the beginning of the 
+		# line that were mistaken for raw_html
+		return :text     if l=~EMailAddress or l=~ URL
+		# raw html is like PHP Markdown Extra: at most three spaces before
+		return :xml_instr if l =~ %r{^\s*<\?}
+		return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
+		return :raw_html if l =~ %r{[ ]{0,3}<\!\-\-}
+		return :ulist    if l =~ /^\s?([\*\-\+])\s+.*\w+/
+		return :olist    if l =~ /^\s?\d+\..*\w+/
+		return :header1  if l =~ /^(=)+/ 
+		return :header2  if l =~ /^([-\s])+$/ 
+		return :header3  if l =~ /^(#)+\s*\S+/ 
+		# at least three asterisks on a line, and only whitespace
+		return :hrule    if l =~ /^(\s*\*\s*){3,1000}$/ 
+		return :hrule    if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
+		return :hrule    if l =~ /^(\s*_\s*){3,1000}$/ # or underscores	
+		return :quote    if l =~ /^>/
+		return :metadata if l =~ /^@/
+#		if @@new_meta_data?
+			return :ald   if l =~ AttributeDefinitionList
+			return :ial   if l =~ InlineAttributeList
+#		end
+#		return :equation_end if l =~ EquationEnd
+		return :text # else, it's just text
+	end
+
+		
+	# $1 = id   $2 = attribute list
+	AttributeDefinitionList = /^\s{0,3}\{([\w\d\s]+)\}:\s*(.*)\s*$/
+	# 
+	InlineAttributeList = /^\s{0,3}\{(.*)\}\s*$/
+	# Example:
+	#     ^:blah blah
+	#     ^: blah blah
+	#     ^   : blah blah
+	Definition = %r{ 
+		^ # begin of line
+		[ ]{0,3} # up to 3 spaces
+		: # colon
+		\s* # whitespace
+		(\S.*) # the text    = $1
+		$ # end of line
+	}x
+
+	# Example:
+	#     *[HTML]: Hyper Text Markup Language
+	Abbreviation = %r{
+		^  # begin of line
+		\* # one asterisk
+		\[ # opening bracket
+		([^\]]+) # any non-closing bracket:  id = $1
+		\] # closing bracket
+		:  # colon
+		\s* # whitespace
+		(\S.*\S)* #           definition=$2
+		\s* # strip this whitespace
+		$   # end of line
+	}x
+
+	FootnoteText = %r{
+		^\s*\[(\^.+)\]: # id = $1 (including '^')
+		\s*(\S.*)?$    # text = $2 (not obb.)
+	}x
+
+	# This regex is taken from BlueCloth sources
+	# Link defs are in the form: ^[id]: \n? url "optional title"
+	LinkRegex = %r{
+		^[ ]*\[([^\]]+)\]:		# id = $1
+		  [ ]*
+		<?(\S+)>?				# url = $2
+		  [ ]*
+		(?:# Titles are delimited by "quotes" or (parens).
+			["(']
+			(.+?)			# title = $3
+			[")']			# Matching ) or "
+			\s*(.+)?   # stuff = $4
+		)?	# title is optional
+	  }x
+
+	IncompleteLink = %r{^\s*\[(.+)\]:\s*$}
+
+	HeaderWithId = /^(.*)\{\#([\w_-]+)\}\s*$/
+
+	HeaderWithAttributes = /^(.*)\{(.*)\}\s*$/
+
+
+	# if contains a pipe, it could be a table header
+	MightBeTableHeader = %r{\|}
+	# -------------:
+	Sep = /\s*(\:)?\s*-+\s*(\:)?\s*/
+	# | -------------:| ------------------------------ |
+	TableSeparator = %r{^(\|?#{Sep}\|?)+\s*$}
+
+
+	EMailAddress = /<([^:]+@[^:]+)>/
+	URL = /^<http:/
+end end