Moved Maruku (and its dependencies) and XHTMLDiff (and its dependencies) to vendor/plugins/ .

Synced with Instiki SVN.
This commit is contained in:
Jacques Distler 2007-02-10 23:03:15 -06:00
parent 64037c67ac
commit 63e217bcfd
59 changed files with 40 additions and 1 deletions

133
vendor/plugins/maruku/lib/maruku.rb vendored Normal file
View file

@ -0,0 +1,133 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'rexml/document'
# :include:MaRuKu.txt
module MaRuKu
module In
module Markdown
module SpanLevelParser; end
module BlockLevelParser; end
end
# more to come?
end
module Out
# Functions for exporting to MarkDown.
module Markdown; end
# Functions for exporting to HTML.
module HTML; end
# Functions for exporting to Latex
module Latex; end
end
# These are strings utilities.
module Strings; end
module Helpers; end
module Errors; end
class MDElement
include REXML
include MaRuKu
include Out::Markdown
include Out::HTML
include Out::Latex
include Strings
include Helpers
include Errors
end
class MDDocument < MDElement
include In::Markdown
include In::Markdown::SpanLevelParser
include In::Markdown::BlockLevelParser
end
end
# This is the public interface
class Maruku < MaRuKu::MDDocument; end
require 'rexml/document'
# Structures definition
require 'maruku/structures'
require 'maruku/structures_inspect'
require 'maruku/defaults'
# Less typing
require 'maruku/helpers'
# Code for parsing whole Markdown documents
require 'maruku/input/parse_doc'
# Ugly things kept in a closet
require 'maruku/string_utils'
require 'maruku/input/linesource'
require 'maruku/input/type_detection'
# A class for reading and sanitizing inline HTML
require 'maruku/input/html_helper'
# Code for parsing Markdown block-level elements
require 'maruku/input/parse_block'
# Code for parsing Markdown span-level elements
require 'maruku/input/charsource'
require 'maruku/input/parse_span_better'
require 'maruku/input/rubypants'
require 'maruku/input/extensions'
require 'maruku/attributes'
require 'maruku/structures_iterators'
require 'maruku/errors_management'
# Code for creating a table of contents
require 'maruku/toc'
# Version and URL
require 'maruku/version'
# Exporting to html
require 'maruku/output/to_html'
# Exporting to latex
require 'maruku/output/to_latex'
require 'maruku/output/to_latex_strings'
require 'maruku/output/to_latex_entities'
# Pretty print
require 'maruku/output/to_markdown'
# Exporting to text: strips all formatting (not complete)
require 'maruku/output/to_s'
# class Maruku is the global interface
require 'maruku/maruku'

View file

@ -0,0 +1,462 @@
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
# NOTE: this is the old span-level regexp-based parser.
#
# The new parser is a real parser and is defined with functions in parse_span_better.rb
# The new parser is faster, handles syntax errors, but it's absolutely not readable.
#
# Also, regexp parsers simply CANNOT handle inline HTML properly.
# There are two black-magic methods `match_couple_of` and `map_match`,
# defined at the end of the file, that make the function
# `parse_lines_as_span` so elegant.
class Maruku
# Takes care of all span-level formatting, links, images, etc.
#
# Lines must not contain block-level elements.
def parse_lines_as_span(lines)
# first, get rid of linebreaks
res = resolve_linebreaks(lines)
span = MDElement.new(:dummy, res)
# encode all escapes
span.replace_each_string { |s| s.escape_md_special }
# The order of processing is significant:
# 1. inline code
# 2. immediate links
# 3. inline HTML
# 4. everything else
# search for ``code`` markers
span.match_couple_of('``') { |children, match1, match2|
e = create_md_element(:inline_code)
# this is now opaque to processing
e.meta[:raw_code] = children.join('').it_was_a_code_block
e
}
# Search for `single tick` code markers
span.match_couple_of('`') { |children, match1, match2|
e = create_md_element(:inline_code)
# this is now opaque to processing
e.meta[:raw_code] = children.join('').it_was_a_code_block
# this is now opaque to processing
e
}
# Detect any immediate link: <http://www.google.com>
# we expect an http: or something: at the beginning
span.map_match( /<(\w+:[^\>]+)>/) { |match|
url = match[1]
e = create_md_element(:immediate_link, [])
e.meta[:url] = url
e
}
# Search for inline HTML (the support is pretty basic for now)
# this searches for a matching block
inlineHTML1 = %r{
( # put everything in 1
< # open
(\w+) # opening tag in 2
> # close
.* # anything
</\2> # match closing tag
)
}x
# this searches for only one block
inlineHTML2 = %r{
( # put everything in 1
< # open
\w+ #
# close
[^<>]* # anything except
/> # closing tag
)
}x
for reg in [inlineHTML1, inlineHTML2]
span.map_match(reg) { |match|
raw_html = match[1]
convert_raw_html_in_list(raw_html)
}
end
# Detect footnotes references: [^1]
span.map_match(/\[(\^[^\]]+)\]/) { |match|
id = match[1].strip.downcase
e = create_md_element(:footnote_reference)
e.meta[:footnote_id] = id
e
}
# Detect any image like ![Alt text][url]
span.map_match(/\!\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match|
alt = match[1]
id = match[2].strip.downcase
if id.size == 0
id = text.strip.downcase
end
e = create_md_element(:image)
e.meta[:ref_id] = id
e
}
# Detect any immage with immediate url: ![Alt](url "title")
# a dummy ref is created and put in the symbol table
link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/
span.map_match(link1) { |match|
alt = match[1]
url = match[2]
title = match[3]
url = url.strip
# create a dummy id
id="dummy_#{@refs.size}"
@refs[id] = {:url=>url, :title=>title}
e = create_md_element(:image)
e.meta[:ref_id] = id
e
}
# an id reference: "[id]", "[ id ]"
reg_id_ref = %r{
\[ # opening bracket
([^\]]*) # 0 or more non-closing bracket (this is too permissive)
\] # closing bracket
}x
# validates a url, only $1 is set to the url
reg_url =
/((?:\w+):\/\/(?:\w+:{0,1}\w*@)?(?:\S+)(?::[0-9]+)?(?:\/|\/([\w#!:.?+=&%@!\-\/]))?)/
reg_url = %r{([^\s\]\)]+)}
# A string enclosed in quotes.
reg_title = %r{
" # opening
[^"]* # anything = 1
" # closing
}x
# [bah](http://www.google.com "Google.com"),
# [bah](http://www.google.com),
# [empty]()
reg_url_and_title = %r{
\( # opening
\s* # whitespace
#{reg_url}? # url = 1 might be empty
(?:\s+["'](.*)["'])? # optional title = 2
\s* # whitespace
\) # closing
}x
# Detect a link like ![Alt text][id]
span.map_match(/\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match|
text = match[1]
id = match[2].strip.downcase
if id.size == 0
id = text.strip.downcase
end
children = parse_lines_as_span(text)
e = create_md_element(:link, children)
e.meta[:ref_id] = id
e
}
# Detect any immage with immediate url: ![Alt](url "title")
# a dummy ref is created and put in the symbol table
link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/
span.map_match(link1) { |match|
text = match[1]
children = parse_lines_as_span(text)
url = match[2]
title = match[3]
url = url.strip
# create a dummy id
id="dummy_#{@refs.size}"
@refs[id] = {:url=>url, :title=>title}
@refs[id][:title] = title if title
e = create_md_element(:link, children)
e.meta[:ref_id] = id
e
}
# Detect any link like [Google engine][google]
span.match_couple_of('[', # opening bracket
%r{\] # closing bracket
[ ]? # optional whitespace
#{reg_id_ref} # ref id, with $1 being the reference
}x
) { |children, match1, match2|
id = match2[1]
id = id.strip.downcase
if id.size == 0
id = children.join.strip.downcase
end
e = create_md_element(:link, children)
e.meta[:ref_id] = id
e
}
# Detect any link with immediate url: [Google](http://www.google.com)
# XXX Note that the url can be empty: [Empty]()
# a dummy ref is created and put in the symbol table
span.match_couple_of('[', # opening bracket
%r{\] # closing bracket
[ ]? # optional whitespace
#{reg_url_and_title} # ref id, with $1 being the url and $2 being the title
}x
) { |children, match1, match2|
url = match2[1]
title = match2[3] # XXX? Is it a bug? I would use [2]
# create a dummy id
id="dummy_#{@refs.size}"
@refs[id] = {:url=>url}
@refs[id][:title] = title if title
e = create_md_element(:link, children)
e.meta[:ref_id] = id
e
}
# Detect an email address <andrea@invalid.it>
span.map_match(EMailAddress) { |match|
email = match[1]
e = create_md_element(:email_address, [])
e.meta[:email] = email
e
}
# Detect HTML entitis
span.map_match(/&([\w\d]+);/) { |match|
entity_name = match[1]
e = create_md_element(:entity, [])
e.meta[:entity_name] = entity_name
e
}
# And now the easy stuff
# search for ***strong and em***
span.match_couple_of('***') { |children,m1,m2|
create_md_element(:strong, [create_md_element(:emphasis, children)] ) }
span.match_couple_of('___') { |children,m1,m2|
create_md_element(:strong, [create_md_element(:emphasis, children)] ) }
# search for **strong**
span.match_couple_of('**') { |children,m1,m2| create_md_element(:strong, children) }
# search for __strong__
span.match_couple_of('__') { |children,m1,m2| create_md_element(:strong, children) }
# search for *emphasis*
span.match_couple_of('*') { |children,m1,m2| create_md_element(:emphasis, children) }
# search for _emphasis_
span.match_couple_of('_') { |children,m1,m2| create_md_element(:emphasis, children) }
# finally, unescape the special characters
span.replace_each_string { |s| s.unescape_md_special}
span.children
end
# returns array containing Strings or :linebreak elements
def resolve_linebreaks(lines)
res = []
s = ""
lines.each do |l|
s += (s.size>0 ? " " : "") + l.strip
if force_linebreak?(l)
res << s
res << create_md_element(:linebreak)
s = ""
end
end
res << s if s.size > 0
res
end
# raw_html is something like
# <em> A</em> dopwkk *maruk* <em>A</em>
def convert_raw_html_in_list(raw_html)
e = create_md_element(:raw_html)
e.meta[:raw_html] = raw_html
begin
e.meta[:parsed_html] = Document.new(raw_html)
rescue
$stderr.puts "convert_raw_html_in_list Malformed HTML:\n#{raw_html}"
end
e
end
end
# And now the black magic that makes the part above so elegant
class MDElement
# Try to match the regexp to each string in the hierarchy
# (using `replace_each_string`). If the regexp match, eliminate
# the matching string and substitute it with the pre_match, the
# result of the block, and the post_match
#
# ..., matched_string, ... -> ..., pre_match, block.call(match), post_match
#
# the block might return arrays.
#
def map_match(regexp, &block)
replace_each_string { |s|
processed = []
while (match = regexp.match(s))
# save the pre_match
processed << match.pre_match if match.pre_match && match.pre_match.size>0
# transform match
result = block.call(match)
# and append as processed
[*result].each do |e| processed << e end
# go on with the rest of the string
s = match.post_match
end
processed << s if s.size > 0
processed
}
end
# Finds couple of delimiters in a hierarchy of Strings and MDElements
#
# Open and close are two delimiters (like '[' and ']'), or two Regexp.
#
# If you don't pass close, it defaults to open.
#
# Each block is called with |contained children, match1, match2|
def match_couple_of(open, close=nil, &block)
close = close || open
open_regexp = open.kind_of?(Regexp) ? open : Regexp.new(Regexp.escape(open))
close_regexp = close.kind_of?(Regexp) ? close : Regexp.new(Regexp.escape(close))
# Do the same to children first
for c in @children; if c.kind_of? MDElement
c.match_couple_of(open_regexp, close_regexp, &block)
end end
processed_children = []
until @children.empty?
c = @children.shift
if c.kind_of? String
match1 = open_regexp.match(c)
if not match1
processed_children << c
else # we found opening, now search closing
# puts "Found opening (#{marker}) in #{c.inspect}"
# pre match is processed
processed_children.push match1.pre_match if
match1.pre_match && match1.pre_match.size > 0
# we will process again the post_match
@children.unshift match1.post_match if
match1.post_match && match1.post_match.size>0
contained = []; found_closing = false
until @children.empty? || found_closing
c = @children.shift
if c.kind_of? String
match2 = close_regexp.match(c)
if not match2
contained << c
else
# we found closing
found_closing = true
# pre match is contained
contained.push match2.pre_match if
match2.pre_match && match2.pre_match.size>0
# we will process again the post_match
@children.unshift match2.post_match if
match2.post_match && match2.post_match.size>0
# And now we call the block
substitute = block.call(contained, match1, match2)
processed_children << substitute
# puts "Found closing (#{marker}) in #{c.inspect}"
# puts "Children: #{contained.inspect}"
# puts "Substitute: #{substitute.inspect}"
end
else
contained << c
end
end
if not found_closing
# $stderr.puts "##### Could not find closing for #{open}, #{close} -- ignoring"
processed_children << match1.to_s
contained.reverse.each do |c|
@children.unshift c
end
end
end
else
processed_children << c
end
end
raise "BugBug" unless @children.empty?
rebuilt = []
# rebuild strings
processed_children.each do |c|
if c.kind_of?(String) && rebuilt.last && rebuilt.last.kind_of?(String)
rebuilt.last << c
else
rebuilt << c
end
end
@children = rebuilt
end
end

View file

@ -0,0 +1,226 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class String
def quote_if_needed
if /[\s\'\"]/.match self
inspect
else
self
end
end
end
module MaRuKu;
MagicChar = ':'
class AttributeList < Array
# An attribute list becomes
# {#id .cl key="val" ref}
# [ [:id, 'id'], [:class, 'id'], ['key', 'val'], [ :ref, 'ref' ]]
private :push
def push_key_val(key, val);
raise "Bad #{key.inspect}=#{val.inspect}" if not key and val
push [key, val]
end
def push_ref(ref_id);
raise "Bad :ref #{ref_id.inspect}" if not ref_id
push [:ref, ref_id+""]
# p "Now ", self ########################################
end
def push_class(val);
raise "Bad :id #{val.inspect}" if not val
push [:class, val]
end
def push_id(val);
raise "Bad :id #{val.inspect}" if not val
push [:id, val]
end
def to_s
map do |k,v|
case k
when :id; "#" + v.quote_if_needed
when :class; "." + v.quote_if_needed
when :ref; v.quote_if_needed
else k.quote_if_needed + "=" + v.quote_if_needed
end
end . join(' ')
end
alias to_md to_s
end
end
module MaRuKu; module In; module Markdown; module SpanLevelParser
def unit_tests_for_attribute_lists
[
[ "", [], "Empty lists are allowed" ],
[ "=", :throw, "Bad char to begin a list with." ],
[ "a =b", :throw, "No whitespace before `=`." ],
[ "a= b", :throw, "No whitespace after `=`." ],
[ "a b", [[:ref, 'a'],[:ref, 'b']], "More than one ref" ],
[ "a b c", [[:ref, 'a'],[:ref, 'b'],[:ref, 'c']], "More than one ref" ],
[ "hello notfound", [[:ref, 'hello'],[:ref, 'notfound']]],
[ "'a'", [[:ref, 'a']], "Quoted value." ],
[ '"a"' ],
[ "a=b", [['a','b']], "Simple key/val" ],
[ "'a'=b" ],
[ "'a'='b'" ],
[ "a='b'" ],
[ 'a="b\'"', [['a',"b\'"]], "Key/val with quotes" ],
[ 'a=b\''],
[ 'a="\\\'b\'"', [['a',"\'b\'"]], "Key/val with quotes" ],
['"', :throw, "Unclosed quotes"],
["'"],
["'a "],
['"a '],
[ "#a", [[:id, 'a']], "Simple ID" ],
[ "#'a'" ],
[ '#"a"' ],
[ "#", :throw, "Unfinished '#'." ],
[ ".", :throw, "Unfinished '.'." ],
[ "# a", :throw, "No white-space after '#'." ],
[ ". a", :throw, "No white-space after '.' ." ],
[ "a=b c=d", [['a','b'],['c','d']], "Tabbing" ],
[ " \ta=b \tc='d' "],
[ "\t a=b\t c='d'\t\t"],
[ ".\"a'", :throw, "Mixing quotes is bad." ],
].map { |s, expected, comment|
@expected = (expected ||= @expected)
@comment = (comment ||= (last=@comment) )
(comment == last && (comment += (@count+=1).to_s)) || @count = 1
expected = [md_ial(expected)] if expected.kind_of? Array
["{#{MagicChar}#{s}}", expected, "Attributes: #{comment}"]
}
end
def md_al(s=[]); AttributeList.new(s) end
# returns nil or an AttributeList
def read_attribute_list(src, con, break_on_chars)
separators = break_on_chars + [?=,?\ ,?\t]
escaped = Maruku::EscapedCharInQuotes
al = AttributeList.new
while true
src.consume_whitespace
break if break_on_chars.include? src.cur_char
case src.cur_char
when nil
maruku_error "Attribute list terminated by EOF:\n "+
"#{al.inspect}" , src, con
tell_user "I try to continue and return partial attribute list:\n"+
al.inspect
break
when ?= # error
maruku_error "In attribute lists, cannot start identifier with `=`."
tell_user "I try to continue"
src.ignore_char
when ?# # id definition
src.ignore_char
if id = read_quoted_or_unquoted(src, con, escaped, separators)
al.push_id id
else
maruku_error 'Could not read `id` attribute.', src, con
tell_user 'Trying to ignore bad `id` attribute.'
end
when ?. # class definition
src.ignore_char
if klass = read_quoted_or_unquoted(src, con, escaped, separators)
al.push_class klass
else
maruku_error 'Could not read `class` attribute.', src, con
tell_user 'Trying to ignore bad `class` attribute.'
end
else
if key = read_quoted_or_unquoted(src, con, escaped, separators)
if src.cur_char == ?=
src.ignore_char # skip the =
if val = read_quoted_or_unquoted(src, con, escaped, separators)
al.push_key_val(key, val)
else
maruku_error "Could not read value for key #{key.inspect}.",
src, con
tell_user "Ignoring key #{key.inspect}."
end
else
al.push_ref key
end
else
maruku_error 'Could not read key or reference.'
end
end # case
end # while true
al
end
def merge_ial(elements, src, con)
# We need a helper
def is_ial(e); e.kind_of? MDElement and e.node_type == :ial end
# Apply each IAL to the element before
elements.each_with_index do |e, i|
if is_ial(e) && i>= 1 then
before = elements[i-1]
after = elements[i+1]
if before.kind_of? MDElement
before.al = e.ial
elsif after.kind_of? MDElement
after.al = e.ial
else
maruku_error "It is not clear to me what element this IAL {:#{e.ial.to_md}} \n"+
"is referring to. The element before is a #{before.class.to_s}, \n"+
"the element after is a #{after.class.to_s}.\n"+
"\n before: #{before.inspect}"+
"\n after: #{after.inspect}",
src, con
# xxx dire se c'è empty vicino
end
end
end
if not Globals[:debug_keep_ials]
elements.delete_if {|x| is_ial(x) unless x == elements.first}
end
end
end end end end
#module MaRuKu; module In; module Markdown; module SpanLevelParser

View file

@ -0,0 +1,65 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
Globals = {
:unsafe_features => false,
:on_error => :warning,
:use_numbered_headers => false,
:maruku_signature => false,
:code_background_color => '#fef',
:code_show_spaces => false,
:html_math_engine => 'itex2mml', #ritex, itex2mml, none
:html_png_engine => 'none',
:html_png_dir => 'pngs',
:html_png_url => 'pngs/',
:html_png_resolution => 200,
:html_use_syntax => false,
:latex_use_listings => false,
:latex_cjk => false,
:debug_keep_ials => false,
}
class MDElement
def get_setting(sym)
if self.attributes.has_key?(sym) then
return self.attributes[sym]
elsif self.doc && self.doc.attributes.has_key?(sym) then
return self.doc.attributes[sym]
elsif MaRuKu::Globals.has_key?(sym)
return MaRuKu::Globals[sym]
else
$stderr.puts "Bug: no default for #{sym.inspect}"
nil
end
end
end
end

View file

@ -0,0 +1,92 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
#m Any method that detects formatting error calls the
#m maruku_error() method.
#m if @meta[:on_error] ==
#m
#m - :warning write on the standard err (or @error_stream if defined),
#m then do your best.
#m - :ignore be shy and try to continue
#m - :raise raises a MarukuException
#m
#m default is :raise
module MaRuKu
class Exception < RuntimeError
end
module Errors
def maruku_error(s,src=nil,con=nil)
policy = get_setting(:on_error)
case policy
when :ignore
when :raise
raise_error create_frame(describe_error(s,src,con))
when :warning
tell_user create_frame(describe_error(s,src,con))
else
raise "BugBug: policy = #{policy.inspect}"
end
end
def maruku_recover(s,src=nil,con=nil)
tell_user create_frame(describe_error(s,src,con))
end
alias error maruku_error
def raise_error(s)
raise MaRuKu::Exception, s, caller
end
def tell_user(s)
error_stream = self.attributes[:error_stream] || $stderr
error_stream << s
end
def create_frame(s)
n = 75
"\n" +
" "+"_"*n + "\n"+
"| Maruku tells you:\n" +
"+" + ("-"*n) +"\n"+
add_tabs(s,1,'| ') + "\n" +
"+" + ("-"*n) + "\n" +
add_tabs(caller[0, 5].join("\n"),1,'!') + "\n" +
"\\" + ("_"*n) + "\n"
end
def describe_error(s,src,con)
t = s
src && (t += "\n#{src.describe}\n")
con && (t += "\n#{con.describe}\n")
t
end
end # Errors
end # MaRuKu

View file

@ -0,0 +1,11 @@
require 'maruku/ext/math/elements'
require 'maruku/ext/math/parsing'
require 'maruku/ext/math/to_latex'
require 'maruku/ext/math/to_html'
require 'maruku/ext/math/mathml_engines/none'
require 'maruku/ext/math/mathml_engines/ritex'
require 'maruku/ext/math/mathml_engines/itex2mml'
require 'maruku/ext/math/mathml_engines/blahtex'

View file

@ -0,0 +1,26 @@
module MaRuKu; class MDElement
def md_inline_math(math)
self.md_el(:inline_math, [], meta={:math=>math})
end
def md_equation(math, label=nil)
reglabel= /\\label\{(\w+)\}/
if math =~ reglabel
label = $1
math.gsub!(reglabel,'')
end
# puts "Found label = #{label} math #{math.inspect} "
num = nil
if label && @doc #take number
@doc.eqid2eq ||= {}
num = @doc.eqid2eq.size + 1
end
e = self.md_el(:equation, [], meta={:math=>math, :label=>label,:num=>num})
if label && @doc #take number
@doc.eqid2eq[label] = e
end
e
end
end end

View file

@ -0,0 +1,108 @@
require 'tempfile'
require 'fileutils'
require 'digest/md5'
require 'pstore'
module MaRuKu; module Out; module HTML
PNG = Struct.new(:src,:depth,:height)
def convert_to_png_blahtex(kind, tex)
begin
FileUtils::mkdir_p MaRuKu::Globals[:html_png_dir]
# first, we check whether this image has already been processed
md5sum = Digest::MD5.hexdigest(tex+" params: ")
result_file = File.join(MaRuKu::Globals[:html_png_dir], md5sum+".txt")
if not File.exists?(result_file)
tmp_in = Tempfile.new('maruku_blahtex')
f = tmp_in.open
f.write tex
f.close
resolution = get_setting(:html_png_resolution)
options = "--png --use-preview-package --shell-dvipng 'dvipng -D #{resolution}' "
options += ("--png-directory '%s'" % MaRuKu::Globals[:html_png_dir])
cmd = "blahtex #{options} < #{tmp_in.path} > #{result_file}"
$stderr.puts "$ #{cmd}"
system cmd
tmp_in.delete
end
result = nil
f = File.open(result_file)
result = f.read
f.close
doc = Document.new(result, {:respect_whitespace =>:all})
png = doc.root.elements[1]
if png.name != 'png'
maruku_error "Blahtex error: \n#{doc}"
return nil
end
depth = png.elements['depth'] || (raise "No depth element in:\n #{doc}")
height = png.elements['height'] || (raise "No height element in:\n #{doc}")
md5 = png.elements['md5'] || (raise "No md5 element in:\n #{doc}")
depth = depth.text.to_f
height = height.text.to_f # XXX check != 0
md5 = md5.text
dir_url = MaRuKu::Globals[:html_png_url]
return PNG.new("#{dir_url}#{md5}.png", depth, height)
rescue Exception => e
maruku_error "Error: #{e}"
end
nil
end
BlahtexCache = PStore.new("blahtex_cache.pstore")
def convert_to_mathml_blahtex(kind, tex)
begin
BlahtexCache.transaction do
if BlahtexCache[tex].nil?
tmp_in = Tempfile.new('maruku_blahtex')
f = tmp_in.open
f.write tex
f.close
tmp_out = Tempfile.new('maruku_blahtex')
options = "--mathml"
cmd = "blahtex #{options} < #{tmp_in.path} > #{tmp_out.path}"
$stderr.puts "$ #{cmd}"
system cmd
tmp_in.delete
result = nil
File.open(tmp_out.path) do |f| result=f.read end
puts result
BlahtexCache[tex] = result
end
blahtex = BlahtexCache[tex]
doc = Document.new(blahtex, {:respect_whitespace =>:all})
mathml = doc.root.elements['mathml']
if not mathml
maruku_error "Blahtex error: \n#{doc}"
return nil
else
return mathml
end
end
rescue Exception => e
maruku_error "Error: #{e}"
end
nil
end
end end end

View file

@ -0,0 +1,29 @@
module MaRuKu; module Out; module HTML
def convert_to_mathml_itex2mml(kind, tex)
begin
if not $itex2mml_parser
require 'itextomml'
$itex2mml_parser = Itex2MML::Parser.new
end
itex_method = {:equation=>:block_filter,:inline=>:inline_filter}
mathml = $itex2mml_parser.send(itex_method[kind], tex)
doc = Document.new(mathml, {:respect_whitespace =>:all}).root
return doc
rescue LoadError => e
maruku_error "Could not load package 'itex2mml'.\n"+
"Please install it."
rescue REXML::ParseException => e
maruku_error "Invalid MathML TeX: \n#{add_tabs(tex,1,'tex>')}"+
"\n\n #{e.inspect}"
rescue
maruku_error "Could not produce MathML TeX: \n#{tex}"+
"\n\n #{e.inspect}"
end
nil
end
end end end

View file

@ -0,0 +1,20 @@
module MaRuKu; module Out; module HTML
def convert_to_mathml_none(kind, tex)
# You can: either return a REXML::Element
# return Element.new 'div'
# or return an empty array on error
# return []
# or have a string parsed by REXML:
tex = tex.gsub('&','&amp;')
mathml = "<code>#{tex}</code>"
return Document.new(mathml).root
end
def convert_to_png_none(kind, tex)
return nil
end
end end end

View file

@ -0,0 +1,24 @@
module MaRuKu; module Out; module HTML
def convert_to_mathml_ritex(kind, tex)
begin
if not $ritex_parser
require 'ritex'
$ritex_parser = Ritex::Parser.new
end
mathml = $ritex_parser.parse(tex.strip)
doc = Document.new(mathml, {:respect_whitespace =>:all}).root
return doc
rescue LoadError => e
maruku_error "Could not load package 'ritex'.\n"+
"Please install it using:\n"+
" $ gem install ritex\n\n"+e.inspect
rescue Racc::ParseError => e
maruku_error "Could not parse TeX: \n#{tex}"+
"\n\n #{e.inspect}"
end
nil
end
end end end

View file

@ -0,0 +1,82 @@
module MaRuKu
class MDDocument
# Hash equation id (String) to equation element (MDElement)
attr_accessor :eqid2eq
end
end
# At least one slash inside
#RegInlineMath1 = /\$([^\$]*[\\][^\$]*)\$/
# No spaces around the delimiters
#RegInlineMath2 = /\$([^\s\$](?:[^\$]*[^\s\$])?)\$/
#RegInlineMath = Regexp::union(RegInlineMath1,RegInlineMath2)
# Everything goes; takes care of escaping the "\$" inside the expression
RegInlineMath = /\${1}((?:[^\$]|\\\$)+)\$/
MaRuKu::In::Markdown::
register_span_extension(:chars => ?$, :regexp => RegInlineMath) do
|doc, src, con|
if m = src.read_regexp(RegInlineMath)
math = m.captures.compact.first
con.push doc.md_inline_math(math)
true
else
#puts "not math: #{src.cur_chars 10}"
false
end
end
EquationStart = /^[ ]{0,3}(?:\\\[|\$\$)(.*)$/
EqLabel = /(?:\((\w+)\))/
OneLineEquation = /^[ ]{0,3}(?:\\\[|\$\$)(.*)(?:\\\]|\$\$)\s*#{EqLabel}?\s*$/
EquationEnd = /^(.*)(?:\\\]|\$\$)\s*#{EqLabel}?\s*$/
MaRuKu::In::Markdown::
register_block_extension(:regexp => EquationStart) do |doc, src, con|
# puts "Equation :#{self}"
first = src.shift_line
if first =~ OneLineEquation
math = $1
label = $2
con.push doc.md_equation($1, $2)
else
first =~ EquationStart
math = $1
label = nil
while true
if not src.cur_line
maruku_error "Stream finished while reading equation\n\n"+
add_tabs(math,1,'$> '), src, con
break
end
line = src.shift_line
if line =~ EquationEnd
math += $1 + "\n"
label = $2 if $2
break
else
math += line + "\n"
end
end
con.push doc.md_equation(math, label)
end
true
end
# This adds support for \eqref
RegEqrefLatex = /\\eqref\{(\w+)\}/
RegEqPar = /\(eq:(\w+)\)/
RegEqref = Regexp::union(RegEqrefLatex, RegEqPar)
MaRuKu::In::Markdown::
register_span_extension(:chars => [?\\, ?(], :regexp => RegEqref) do
|doc, src, con|
eqid = src.read_regexp(RegEqref).captures.compact.first
r = doc.md_el(:eqref, [], meta={:eqid=>eqid})
con.push r
true
end

View file

@ -0,0 +1,170 @@
=begin maruku_doc
Extension: math
Attribute: html_math_engine
Scope: document, element
Output: html
Summary: Select the rendering engine for MathML.
Default: <?mrk Globals[:html_math_engine].to_s ?>
Select the rendering engine for math.
If you want to use your custom engine `foo`, then set:
HTML math engine: foo
{:lang=markdown}
and then implement two functions:
def convert_to_mathml_foo(kind, tex)
...
end
=end
=begin maruku_doc
Extension: math
Attribute: html_png_engine
Scope: document, element
Output: html
Summary: Select the rendering engine for math.
Default: <?mrk Globals[:html_math_engine].to_s ?>
Same thing as `html_math_engine`, only for PNG output.
def convert_to_png_foo(kind, tex)
# same thing
...
end
{:lang=ruby}
=end
module MaRuKu; module Out; module HTML
# Creates an xml Mathml document of self.math
def render_mathml(kind, tex)
engine = get_setting(:html_math_engine)
method = "convert_to_mathml_#{engine}".to_sym
if self.respond_to? method
mathml = self.send(method, kind, tex)
return mathml || convert_to_mathml_none(kind, tex)
else
puts "A method called #{method} should be defined."
return convert_to_mathml_none(kind, tex)
end
end
# Creates an xml Mathml document of self.math
def render_png(kind, tex)
engine = get_setting(:html_png_engine)
method = "convert_to_png_#{engine}".to_sym
if self.respond_to? method
return self.send(method, kind, tex)
else
puts "A method called #{method} should be defined."
return nil
end
end
def pixels_per_ex
if not $pixels_per_ex
x = render_png(:inline, "x")
$pixels_per_ex = x.height # + x.depth
end
$pixels_per_ex
end
def adjust_png(png, use_depth)
src = png.src
height_in_px = png.height
depth_in_px = png.depth
height_in_ex = height_in_px / pixels_per_ex
depth_in_ex = depth_in_px / pixels_per_ex
total_height_in_ex = height_in_ex + depth_in_ex
style = ""
style += "vertical-align: -#{depth_in_ex}ex;" if use_depth
style += "height: #{total_height_in_ex}ex;"
img = Element.new 'img'
img.attributes['src'] = src
img.attributes['style'] = style
img.attributes['alt'] = "equation"
img
end
def to_html_inline_math
mathml = render_mathml(:inline, self.math)
png = render_png(:inline, self.math)
span = create_html_element 'span'
add_class_to(span, 'maruku-inline')
if mathml
add_class_to(mathml, 'maruku-mathml')
span << mathml
end
if png
img = adjust_png(png, use_depth=true)
add_class_to(img, 'maruku-png')
span << img
end
span
end
def to_html_equation
mathml = render_mathml(:equation, self.math)
png = render_png(:equation, self.math)
div = create_html_element 'div'
add_class_to(div, 'maruku-equation')
if self.label # then numerate
span = Element.new 'span'
span.attributes['class'] = 'maruku-eq-number'
num = self.num
span << Text.new("(#{num})")
div << span
div.attributes['id'] = "eq:#{self.label}"
end
if mathml
add_class_to(mathml, 'maruku-mathml')
div << mathml
end
if png
img = adjust_png(png, use_depth=false)
add_class_to(img, 'maruku-png')
div << img
end
source_div = Element.new 'div'
add_class_to(source_div, 'maruku-eq-tex')
code = convert_to_mathml_none(:equation, self.math)
code.attributes['style'] = 'display: none'
source_div << code
div << source_div
div
end
def to_html_eqref
if eq = self.doc.eqid2eq[self.eqid]
num = eq.num
a = Element.new 'a'
a.attributes['class'] = 'maruku-eqref'
a.attributes['href'] = "#eq:#{self.eqid}"
a << Text.new("(#{num})")
a
else
maruku_error "Cannot find equation #{self.eqid.inspect}"
Text.new "(eq:#{self.eqid})"
end
end
end end end

View file

@ -0,0 +1,21 @@
module MaRuKu; module Out; module Latex
def to_latex_inline_math
"$#{self.math.strip}$"
end
def to_latex_equation
if self.label
l = "\\label{#{self.label}}"
"\\begin{equation}\n#{self.math.strip}\n#{l}\\end{equation}\n"
else
"\\begin{displaymath}\n#{self.math.strip}\n\\end{displaymath}\n"
end
end
def to_latex_eqref
"\\eqref{#{self.eqid}}"
end
end end end

View file

@ -0,0 +1,259 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
# A series of helper functions for creating elements: they hide the
# particular internal representation.
#
# Please, always use these instead of creating MDElement.
#
module MaRuKu
module Helpers
# if the first is a md_ial, it is used as such
def md_el(node_type, children=[], meta={}, al=nil)
if (e=children.first).kind_of?(MDElement) and
e.node_type == :ial then
if al
al += e.ial
else
al = e.ial
end
children.shift
end
e = MDElement.new(node_type, children, meta, al)
e.doc = @doc
return e
end
def md_header(level, children, al=nil)
md_el(:header, children, {:level => level}, al)
end
# Inline code
def md_code(code, al=nil)
md_el(:inline_code, [], {:raw_code => code}, al)
end
# Code block
def md_codeblock(source, al=nil)
md_el(:code, [], {:raw_code => source}, al)
end
def md_quote(children, al=nil)
md_el(:quote, children, {}, al)
end
def md_li(children, want_my_par, al=nil)
md_el(:li, children, {:want_my_paragraph=>want_my_par}, al)
end
def md_footnote(footnote_id, children, al=nil)
md_el(:footnote, children, {:footnote_id=>footnote_id}, al)
end
def md_abbr_def(abbr, text, al=nil)
md_el(:abbr_def, [], {:abbr=>abbr, :text=>text}, al)
end
def md_abbr(abbr, title)
md_el(:abbr, [abbr], {:title=>title})
end
def md_html(raw_html, al=nil)
e = md_el(:raw_html, [], {:raw_html=>raw_html})
begin
# remove newlines and whitespace at begin
# end end of string, or else REXML gets confused
raw_html = raw_html.gsub(/\A\s*</,'<').
gsub(/>[\s\n]*\Z/,'>')
raw_html = "<marukuwrap>#{raw_html}</marukuwrap>"
e.instance_variable_set :@parsed_html,
REXML::Document.new(raw_html)
rescue
# tell_user "Malformed block of HTML:\n"+
# add_tabs(raw_html,1,'|')
# " #{raw_html.inspect}\n\n"+ex.inspect
end
e
end
def md_link(children, ref_id, al=nil)
md_el(:link, children, {:ref_id=>ref_id.downcase}, al)
end
def md_im_link(children, url, title=nil, al=nil)
md_el(:im_link, children, {:url=>url,:title=>title}, al)
end
def md_image(children, ref_id, al=nil)
md_el(:image, children, {:ref_id=>ref_id}, al)
end
def md_im_image(children, url, title=nil, al=nil)
md_el(:im_image, children, {:url=>url,:title=>title},al)
end
def md_em(children, al=nil)
md_el(:emphasis, [children].flatten, {}, al)
end
def md_br()
md_el(:linebreak, [], {}, nil)
end
def md_hrule()
md_el(:hrule, [], {}, nil)
end
def md_strong(children, al=nil)
md_el(:strong, [children].flatten, {}, al)
end
def md_emstrong(children, al=nil)
md_strong(md_em(children), al)
end
# <http://www.example.com/>
def md_url(url, al=nil)
md_el(:immediate_link, [], {:url=>url}, al)
end
# <andrea@rubyforge.org>
# <mailto:andrea@rubyforge.org>
def md_email(email, al=nil)
md_el(:email_address, [], {:email=>email}, al)
end
def md_entity(entity_name, al=nil)
md_el(:entity, [], {:entity_name=>entity_name}, al)
end
# Markdown extra
def md_foot_ref(ref_id, al=nil)
md_el(:footnote_reference, [], {:footnote_id=>ref_id}, al)
end
def md_par(children, al=nil)
md_el(:paragraph, children, meta={}, al)
end
# [1]: http://url [properties]
def md_ref_def(ref_id, url, title=nil, meta={}, al=nil)
meta[:url] = url
meta[:ref_id] = ref_id
meta[:title] = title if title
md_el(:ref_definition, [], meta, al)
end
# inline attribute list
def md_ial(al)
al = Maruku::AttributeList.new(al) if
not al.kind_of?Maruku::AttributeList
md_el(:ial, [], {:ial=>al})
end
# Attribute list definition
def md_ald(id, al)
md_el(:ald, [], {:ald_id=>id,:ald=>al})
end
# Server directive <?target code... ?>
def md_xml_instr(target, code)
md_el(:xml_instr, [], {:target=>target, :code=>code})
end
end
end
module MaRuKu
class MDElement
# outputs abbreviated form (this should be eval()uable to get the document)
def inspect2
s =
case @node_type
when :paragraph
"md_par(%s)" % children_inspect
when :footnote_reference
"md_foot_ref(%s)" % self.footnote_id.inspect
when :entity
"md_entity(%s)" % self.entity_name.inspect
when :email_address
"md_email(%s)" % self.email.inspect
when :inline_code
"md_code(%s)" % self.raw_code.inspect
when :raw_html
"md_html(%s)" % self.raw_html.inspect
when :emphasis
"md_em(%s)" % children_inspect
when :strong
"md_strong(%s)" % children_inspect
when :immediate_link
"md_url(%s)" % self.url.inspect
when :image
"md_image(%s, %s)" % [
children_inspect,
self.ref_id.inspect]
when :im_image
"md_im_image(%s, %s, %s)" % [
children_inspect,
self.url.inspect,
self.title.inspect]
when :link
"md_link(%s,%s)" % [
children_inspect, self.ref_id.inspect]
when :im_link
"md_im_link(%s, %s, %s)" % [
children_inspect,
self.url.inspect,
self.title.inspect,
]
when :ref_definition
"md_ref_def(%s, %s, %s)" % [
self.ref_id.inspect,
self.url.inspect,
self.title.inspect
]
when :ial
"md_ial(%s)" % self.ial.inspect
else
return nil
end
if @al and not @al.empty? then
s = s.chop + ", #{@al.inspect})"
end
s
end
end
end

View file

@ -0,0 +1,326 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu; module In; module Markdown; module SpanLevelParser
# a string scanner coded by me
class CharSourceManual; end
# a wrapper around StringScanner
class CharSourceStrscan; end
# A debug scanner that checks the correctness of both
# by comparing their output
class CharSourceDebug; end
# Choose!
CharSource = CharSourceManual # faster! 58ms vs. 65ms
#CharSource = CharSourceStrscan
#CharSource = CharSourceDebug
class CharSourceManual
include MaRuKu::Strings
def initialize(s, parent=nil)
raise "Passed #{s.class}" if not s.kind_of? String
@buffer = s
@buffer_index = 0
@parent = parent
end
# Return current char as a FixNum (or nil).
def cur_char; @buffer[@buffer_index] end
# Return the next n chars as a String.
def cur_chars(n); @buffer[@buffer_index,n] end
# Return the char after current char as a FixNum (or nil).
def next_char; @buffer[@buffer_index+1] end
def shift_char
c = @buffer[@buffer_index]
@buffer_index+=1
c
end
def ignore_char
@buffer_index+=1
nil
end
def ignore_chars(n)
@buffer_index+=n
nil
end
def current_remaining_buffer
@buffer[@buffer_index, @buffer.size-@buffer_index]
end
def cur_chars_are(string)
# There is a bug here
if false
r2 = /^.{#{@buffer_index}}#{Regexp.escape string}/m
@buffer =~ r2
else
cur_chars(string.size) == string
end
end
def next_matches(r)
r2 = /^.{#{@buffer_index}}#{r}/m
md = r2.match @buffer
return !!md
end
def read_regexp3(r)
r2 = /^.{#{@buffer_index}}#{r}/m
m = r2.match @buffer
if m
consumed = m.to_s.size - @buffer_index
# puts "Consumed #{consumed} chars (entire is #{m.to_s.inspect})"
ignore_chars consumed
else
# puts "Could not read regexp #{r2.inspect} from buffer "+
# " index=#{@buffer_index}"
# puts "Cur chars = #{cur_chars(20).inspect}"
# puts "Matches? = #{cur_chars(20) =~ r}"
end
m
end
def read_regexp(r)
r2 = /^#{r}/
rest = current_remaining_buffer
m = r2.match(rest)
if m
@buffer_index += m.to_s.size
# puts "#{r} matched #{rest.inspect}: #{m.to_s.inspect}"
end
return m
end
def consume_whitespace
while c = cur_char
if (c == 32 || c == ?\t)
# puts "ignoring #{c}"
ignore_char
else
# puts "#{c} is not ws: "<<c
break
end
end
end
def read_text_chars(out)
s = @buffer.size; c=nil
while @buffer_index < s && (c=@buffer[@buffer_index]) &&
((c>=?a && c<=?z) || (c>=?A && c<=?Z))
out << c
@buffer_index += 1
end
end
def describe
s = describe_pos(@buffer, @buffer_index)
if @parent
s += "\n\n" + @parent.describe
end
s
end
include SpanLevelParser
end
def describe_pos(buffer, buffer_index)
len = 75
num_before = [len/2, buffer_index].min
num_after = [len/2, buffer.size-buffer_index].min
num_before_max = buffer_index
num_after_max = buffer.size-buffer_index
# puts "num #{num_before} #{num_after}"
num_before = [num_before_max, len-num_after].min
num_after = [num_after_max, len-num_before].min
# puts "num #{num_before} #{num_after}"
index_start = [buffer_index - num_before, 0].max
index_end = [buffer_index + num_after, buffer.size].min
size = index_end- index_start
# puts "- #{index_start} #{size}"
str = buffer[index_start, size]
str.gsub!("\n",'N')
str.gsub!("\t",'T')
if index_end == buffer.size
str += "EOF"
end
pre_s = buffer_index-index_start
pre_s = [pre_s, 0].max
pre_s2 = [len-pre_s,0].max
# puts "pre_S = #{pre_s}"
pre =" "*(pre_s)
"-"*len+"\n"+
str + "\n" +
"-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
# pre + "|\n"+
pre + "+--- Byte #{buffer_index}\n"+
"Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
add_tabs(buffer,1,">")
# "CharSource: At character #{@buffer_index} of block "+
# " beginning with:\n #{@buffer[0,50].inspect} ...\n"+
# " before: \n ... #{cur_chars(50).inspect} ... "
end
require 'strscan'
class CharSourceStrscan
include SpanLevelParser
include MaRuKu::Strings
def initialize(s, parent=nil)
@s = StringScanner.new(s)
@parent = parent
end
# Return current char as a FixNum (or nil).
def cur_char
@s.peek(1)[0]
end
# Return the next n chars as a String.
def cur_chars(n);
@s.peek(n)
end
# Return the char after current char as a FixNum (or nil).
def next_char;
@s.peek(2)[1]
end
def shift_char
(@s.get_byte)[0]
end
def ignore_char
@s.get_byte
nil
end
def ignore_chars(n)
n.times do @s.get_byte end
nil
end
def current_remaining_buffer
@s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index]
end
def cur_chars_are(string)
cur_chars(string.size) == string
end
def next_matches(r)
len = @s.match?(r)
return !!len
end
def read_regexp(r)
string = @s.scan(r)
if string
return r.match(string)
else
return nil
end
end
def consume_whitespace
@s.scan /\s+/
nil
end
def describe
describe_pos(@s.string, @s.pos)
end
end
class CharSourceDebug
def initialize(s, parent)
@a = CharSourceManual.new(s, parent)
@b = CharSourceStrscan.new(s, parent)
end
def method_missing(methodname, *args)
a_bef = @a.describe
b_bef = @b.describe
a = @a.send(methodname, *args)
b = @b.send(methodname, *args)
# if methodname == :describe
# return a
# end
if a.kind_of? MatchData
if a.to_a != b.to_a
puts "called: #{methodname}(#{args})"
puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
puts "AFTER: "+@a.describe
puts "AFTER: "+@b.describe
puts "BEFORE: "+a_bef
puts "BEFORE: "+b_bef
puts caller.join("\n")
exit
end
else
if a!=b
puts "called: #{methodname}(#{args})"
puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
puts ""+@a.describe
puts ""+@b.describe
puts caller.join("\n")
exit
end
end
if @a.cur_char != @b.cur_char
puts "Fuori sincronia dopo #{methodname}(#{args})"
puts ""+@a.describe
puts ""+@b.describe
exit
end
return a
end
end
end end end end

View file

@ -0,0 +1,68 @@
module MaRuKu; module In; module Markdown
# Hash Fixnum -> name
SpanExtensionsTrigger = {}
class SpanExtension
# trigging chars
attr_accessor :chars
# trigging regexp
attr_accessor :regexp
# lambda
attr_accessor :block
end
# Hash String -> Extension
SpanExtensions = {}
def check_span_extensions(src, con)
c = src.cur_char
if extensions = SpanExtensionsTrigger[c]
extensions.each do |e|
if e.regexp && (match = src.next_matches(e.regexp))
return true if e.block.call(doc, src, con)
end
end
end
return false # not special
end
def self.register_span_extension(args, &block)
e = SpanExtension.new
e.chars = [*args[:chars]]
e.regexp = args[:regexp]
e.block = block
e.chars.each do |c|
(SpanExtensionsTrigger[c] ||= []).push e
end
end
def self.register_block_extension(args, &block)
regexp = args[:regexp]
BlockExtensions[regexp] = block
end
# Hash Regexp -> Block
BlockExtensions = {}
def check_block_extensions(src, con, line)
BlockExtensions.each do |reg, block|
if m = reg.match(line)
block = BlockExtensions[reg]
return true if block.call(doc, src, con)
end
end
return false # not special
end
def any_matching_block_extension?(line)
BlockExtensions.each_key do |reg|
m = reg.match(line)
return m if m
end
return false
end
end end end

View file

@ -0,0 +1,175 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu; module In; module Markdown; module SpanLevelParser
# This class helps me read and sanitize HTML blocks
# I tried to do this with REXML, but wasn't able to. (suggestions?)
class HTMLHelper
include MaRuKu::Strings
Tag = %r{^<(/)?(\w+)\s*([^>]*)>}m
PartialTag = %r{^<.*}m
EverythingElse = %r{^[^<]+}m
CommentStart = %r{^<!--}x
CommentEnd = %r{^.*-->}
TO_SANITIZE = ['img','hr']
attr_reader :rest
def initialize
@rest = ""
@tag_stack = []
@m = nil
@already = ""
self.state = :inside_element
end
attr_accessor :state # :inside_element, :inside_tag, :inside_comment,
def eat_this(line)
@rest = line + @rest
things_read = 0
until @rest.empty?
case self.state
when :inside_comment
if @m = CommentEnd.match(@rest)
@already += @m.pre_match + @m.to_s
@rest = @m.post_match
self.state = :inside_element
else
@already += @rest
@rest = ""
self.state = :inside_comment
end
when :inside_element
if @m = CommentStart.match(@rest)
things_read += 1
@already += @m.pre_match + @m.to_s
@rest = @m.post_match
self.state = :inside_comment
elsif @m = Tag.match(@rest) then
things_read += 1
handle_tag
self.state = :inside_element
elsif @m = PartialTag.match(@rest) then
@already += @m.pre_match
@rest = @m.post_match
@partial_tag = @m.to_s
self.state = :inside_tag
elsif @m = EverythingElse.match(@rest)
@already += @m.pre_match + @m.to_s
@rest = @m.post_match
self.state = :inside_element
else
error "Malformed HTML: not complete: #{@rest.inspect}"
end
when :inside_tag
if @m = /^[^>]*>/.match(@rest) then
@partial_tag += @m.to_s
@rest = @partial_tag + @m.post_match
@partial_tag = nil
self.state = :inside_element
else
@partial_tag += @rest
@rest = ""
self.state = :inside_tag
end
else
raise "Bug bug: state = #{self.state.inspect}"
end # not inside comment
# puts inspect
# puts "Read: #{@tag_stack.inspect}"
break if is_finished? and things_read>0
end
end
def handle_tag()
@already += @m.pre_match
@rest = @m.post_match
is_closing = !!@m[1]
tag = @m[2]
attributes = @m[3]
is_single = false
if attributes =~ /\A(.*)\/\Z/
attributes = $1
is_single = true
end
# puts "READ TAG #{@m.to_s.inspect} tag = #{tag} closing? #{is_closing} single = #{is_single}"
if TO_SANITIZE.include? tag
attributes.strip!
# puts "Attributes: #{attributes.inspect}"
if attributes.size > 0
@already += '<%s %s />' % [tag, attributes]
else
@already += '<%s />' % [tag]
end
elsif is_closing
@already += @m.to_s
if @tag_stack.empty?
error "Malformed: closing tag #{tag.inspect} "+
"in empty list"
end
if @tag_stack.last != tag
error "Malformed: tag <#{tag}> "+
"closes <#{@tag_stack.last}>"
end
@tag_stack.pop
else
@already += @m.to_s
@tag_stack.push(tag) unless is_single
end
end
def error(s)
raise Exception, "Error: #{s} \n"+ inspect, caller
end
def inspect; "HTML READER\n state=#{self.state} "+
"match=#{@m.to_s.inspect}\n"+
"Tag stack = #{@tag_stack.inspect} \n"+
"Before:\n"+
add_tabs(@already,1,'|')+"\n"+
"After:\n"+
add_tabs(@rest,1,'|')+"\n"
end
def stuff_you_read
@already
end
def is_finished?
(self.state == :inside_element) and @tag_stack.empty?
end
end # html helper
end end end end

View file

@ -0,0 +1,110 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu; module In; module Markdown; module BlockLevelParser
# This represents a source of lines that can be consumed.
#
# It is the twin of CharSource.
#
class LineSource
include MaRuKu::Strings
def initialize(lines, parent=nil, parent_offset=nil)
raise "NIL lines? " if not lines
@lines = lines
@lines_index = 0
@parent = parent
@parent_offset = parent_offset
end
def cur_line() @lines[@lines_index] end
def next_line() @lines[@lines_index+1] end
def shift_line()
raise "Over the rainbow" if @lines_index >= @lines.size
l = @lines[@lines_index]
@lines_index += 1
return l
end
def ignore_line
raise "Over the rainbow" if @lines_index >= @lines.size
@lines_index += 1
end
def describe
s = "At line #{original_line_number(@lines_index)}\n"
context = 3 # lines
from = [@lines_index-context, 0].max
to = [@lines_index+context, @lines.size-1].min
for i in from..to
prefix = (i == @lines_index) ? '--> ' : ' ';
l = @lines[i]
s += "%10s %4s|%s" %
[@lines[i].md_type.to_s, prefix, l]
s += "|\n"
end
# if @parent
# s << "Parent context is: \n"
# s << add_tabs(@parent.describe,1,'|')
# end
s
end
def original_line_number(index)
if @parent
return index + @parent.original_line_number(@parent_offset)
else
1 + index
end
end
def cur_index
@lines_index
end
# Returns the type of next line as a string
# breaks at first :definition
def tell_me_the_future
s = ""; num_e = 0;
for i in @lines_index..@lines.size-1
c = case @lines[i].md_type
when :text; "t"
when :empty; num_e+=1; "e"
when :definition; "d"
else "o"
end
s += c
break if c == "d" or num_e>1
end
s
end
end # linesource
end end end end # block

View file

@ -0,0 +1,596 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu; module In; module Markdown; module BlockLevelParser
include Helpers
include MaRuKu::Strings
include MaRuKu::In::Markdown::SpanLevelParser
class BlockContext < Array
def describe
n = 5
desc = size > n ? self[-n,n] : self
"Last #{n} elements: "+
desc.map{|x| "\n -" + x.inspect}.join
end
end
# Splits the string and calls parse_lines_as_markdown
def parse_text_as_markdown(text)
lines = split_lines(text)
src = LineSource.new(lines)
return parse_blocks(src)
end
# Input is a LineSource
def parse_blocks(src)
output = BlockContext.new
# run state machine
while src.cur_line
next if check_block_extensions(src, output, src.cur_line)
# Prints detected type (useful for debugging)
# puts "#{src.cur_line.md_type}|#{src.cur_line}"
case src.cur_line.md_type
when :empty;
output.push :empty
src.ignore_line
when :ial
m = InlineAttributeList.match src.shift_line
content = m[1] || ""
# puts "Content: #{content.inspect}"
src2 = CharSource.new(content, src)
interpret_extension(src2, output, [nil])
when :ald
output.push read_ald(src)
when :text
if src.cur_line =~ MightBeTableHeader and
(src.next_line && src.next_line =~ TableSeparator)
output.push read_table(src)
elsif [:header1,:header2].include? src.next_line.md_type
output.push read_header12(src)
elsif eventually_comes_a_def_list(src)
definition = read_definition(src)
if output.last.kind_of?(MDElement) &&
output.last.node_type == :definition_list then
output.last.children << definition
else
output.push md_el(:definition_list, [definition])
end
else # Start of a paragraph
output.push read_paragraph(src)
end
when :header2, :hrule
# hrule
src.shift_line
output.push md_hrule()
when :header3
output.push read_header3(src)
when :ulist, :olist
list_type = src.cur_line.md_type == :ulist ? :ul : :ol
li = read_list_item(src)
# append to current list if we have one
if output.last.kind_of?(MDElement) &&
output.last.node_type == list_type then
output.last.children << li
else
output.push md_el(list_type, [li])
end
when :quote; output.push read_quote(src)
when :code; e = read_code(src); output << e if e
when :raw_html; e = read_raw_html(src); output << e if e
when :footnote_text; output.push read_footnote_text(src)
when :ref_definition; read_ref_definition(src, output)
when :abbreviation; output.push read_abbreviation(src)
when :xml_instr; read_xml_instruction(src, output)
when :metadata;
maruku_error "Please use the new meta-data syntax: \n"+
" http://maruku.rubyforge.org/proposal.html\n", src
src.ignore_line
else # warn if we forgot something
md_type = src.cur_line.md_type
line = src.cur_line
maruku_error "Ignoring line '#{line}' type = #{md_type}", src
src.shift_line
end
end
merge_ial(output, src, output)
output.delete_if {|x| x.kind_of?(MDElement) &&
x.node_type == :ial}
# get rid of empty line markers
output.delete_if {|x| x == :empty}
# See for each list if we can omit the paragraphs and use li_span
# TODO: do this after
output.each do |c|
# Remove paragraphs that we can get rid of
if [:ul,:ol].include? c.node_type
if c.children.all? {|li| !li.want_my_paragraph} then
c.children.each do |d|
d.node_type = :li_span
d.children = d.children[0].children
end
end
end
if c.node_type == :definition_list
if c.children.all?{|defi| !defi.want_my_paragraph} then
c.children.each do |definition|
definition.definitions.each do |dd|
dd.children = dd.children[0].children
end
end
end
end
end
output
end
def read_ald(src)
if (l=src.shift_line) =~ AttributeDefinitionList
id = $1; al=$2;
al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])
self.ald[id] = al;
return md_ald(id, al)
else
maruku_error "Bug Bug:\n#{l.inspect}"
return nil
end
end
# reads a header (with ----- or ========)
def read_header12(src)
line = src.shift_line.strip
al = nil
# Check if there is an IAL
if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
line = $1.strip
ial = $2
al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
end
text = parse_lines_as_span [ line ]
level = src.cur_line.md_type == :header2 ? 2 : 1;
src.shift_line
return md_header(level, text, al)
end
# reads a header like '#### header ####'
def read_header3(src)
line = src.shift_line.strip
al = nil
# Check if there is an IAL
if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
line = $1.strip
ial = $2
al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
end
level = num_leading_hashes(line)
text = parse_lines_as_span [strip_hashes(line)]
return md_header(level, text, al)
end
def read_xml_instruction(src, output)
m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
raise "BugBug" if not m
target = m[2] || ''
code = m[3]
until code =~ /\?>/
code += "\n"+src.shift_line
end
if not code =~ (/\?>\s*$/)
garbage = (/\?>(.*)$/.match(code))[1]
maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+
add_tabs(code, 1, '|'), src
end
code.gsub!(/\?>\s*$/, '')
if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
result = safe_execute_code(self, code)
if result
if result.kind_of? String
raise "Not expected"
else
output.push *result
end
end
else
output.push md_xml_instr(target, code)
end
end
def read_raw_html(src)
h = HTMLHelper.new
begin
h.eat_this(l=src.shift_line)
# puts "\nBLOCK:\nhtml -> #{l.inspect}"
while src.cur_line and not h.is_finished?
l=src.shift_line
# puts "html -> #{l.inspect}"
h.eat_this "\n"+l
end
rescue Exception => e
ex = e.inspect + e.backtrace.join("\n")
maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'|')}\n", src
end
raw_html = h.stuff_you_read
return md_html(raw_html)
end
def read_paragraph(src)
lines = []
while src.cur_line
# :olist does not break
case t = src.cur_line.md_type
when :quote,:header3,:empty,:raw_html,:ref_definition,:ial,:xml_instr
break
when :olist,:ulist
break if src.next_line.md_type == t
end
break if src.cur_line.strip.size == 0
break if [:header1,:header2].include? src.next_line.md_type
break if any_matching_block_extension?(src.cur_line)
lines << src.shift_line
end
# dbg_describe_ary(lines, 'PAR')
children = parse_lines_as_span(lines, src)
return md_par(children)
end
# Reads one list item, either ordered or unordered.
def read_list_item(src)
parent_offset = src.cur_index
item_type = src.cur_line.md_type
first = src.shift_line
# Ugly things going on inside `read_indented_content`
indentation = spaces_before_first_char(first)
break_list = [:ulist, :olist, :ial]
lines, want_my_paragraph =
read_indented_content(src,indentation, break_list, item_type)
# add first line
# Strip first '*', '-', '+' from first line
stripped = first[indentation, first.size-1]
lines.unshift stripped
#dbg_describe_ary(lines, 'LIST ITEM ')
src2 = LineSource.new(lines, src, parent_offset)
children = parse_blocks(src2)
with_par = want_my_paragraph || (children.size>1)
return md_li(children, with_par)
end
def read_abbreviation(src)
if not (l=src.shift_line) =~ Abbreviation
maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
end
abbr = $1
desc = $2
if (not abbr) or (abbr.size==0)
maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
end
self.abbreviations[abbr] = desc
return md_abbr_def(abbr, desc)
end
def read_footnote_text(src)
parent_offset = src.cur_index
first = src.shift_line
if not first =~ FootnoteText
maruku_error "Bug (it's Andrea's fault)"
end
id = $1
text = $2
# Ugly things going on inside `read_indented_content`
indentation = 4 #first.size-text.size
# puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
break_list = [:footnote_text]
item_type = :footnote_text
lines, want_my_paragraph =
read_indented_content(src,indentation, break_list, item_type)
# add first line
if text && text.strip != "" then lines.unshift text end
# dbg_describe_ary(lines, 'FOOTNOTE')
src2 = LineSource.new(lines, src, parent_offset)
children = parse_blocks(src2)
e = md_footnote(id, children)
self.footnotes[id] = e
return e
end
# This is the only ugly function in the code base.
# It is used to read list items, descriptions, footnote text
def read_indented_content(src, indentation, break_list, item_type)
lines =[]
# collect all indented lines
saw_empty = false; saw_anything_after = false
while src.cur_line
#puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"
if src.cur_line.md_type == :empty
saw_empty = true
lines << src.shift_line
next
end
# after a white line
if saw_empty
# we expect things to be properly aligned
if (ns=number_of_leading_spaces(src.cur_line)) < indentation
#puts "breaking for spaces, only #{ns}: #{src.cur_line}"
break
end
saw_anything_after = true
else
break if break_list.include? src.cur_line.md_type
# break if src.cur_line.md_type != :text
end
stripped = strip_indent(src.shift_line, indentation)
lines << stripped
#puts "Accepted as #{stripped.inspect}"
# You are only required to indent the first line of
# a child paragraph.
if stripped.md_type == :text
while src.cur_line && (src.cur_line.md_type == :text)
lines << strip_indent(src.shift_line, indentation)
end
end
end
want_my_paragraph = saw_anything_after ||
(saw_empty && (src.cur_line && (src.cur_line.md_type == item_type)))
# dbg_describe_ary(lines, 'LI')
# create a new context
while lines.last && (lines.last.md_type == :empty)
lines.pop
end
return lines, want_my_paragraph
end
def read_quote(src)
parent_offset = src.cur_index
lines = []
# collect all indented lines
while src.cur_line && src.cur_line.md_type == :quote
lines << unquote(src.shift_line)
end
# dbg_describe_ary(lines, 'QUOTE')
src2 = LineSource.new(lines, src, parent_offset)
children = parse_blocks(src2)
return md_quote(children)
end
def read_code(src)
# collect all indented lines
lines = []
while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)
lines << strip_indent(src.shift_line, 4)
end
#while lines.last && (lines.last.md_type == :empty )
while lines.last && lines.last.strip.size == 0
lines.pop
end
while lines.first && lines.first.strip.size == 0
lines.shift
end
return nil if lines.empty?
source = lines.join("\n")
# dbg_describe_ary(lines, 'CODE')
return md_codeblock(source)
end
# Reads a series of metadata lines with empty lines in between
def read_metadata(src)
hash = {}
while src.cur_line
case src.cur_line.md_type
when :empty; src.shift_line
when :metadata; hash.merge! parse_metadata(src.shift_line)
else break
end
end
hash
end
def read_ref_definition(src, out)
line = src.shift_line
# if link is incomplete, shift next line
if src.cur_line && (src.cur_line.md_type != :ref_definition) &&
([1,2,3].include? number_of_leading_spaces(src.cur_line) )
line += " "+ src.shift_line
end
# puts "total= #{line}"
match = LinkRegex.match(line)
if not match
maruku_error "Link does not respect format: '#{line}'"
return
end
id = match[1]; url = match[2]; title = match[3];
id = id.strip.downcase.gsub(' ','_')
hash = self.refs[id] = {:url=>url,:title=>title}
stuff=match[4]
if stuff
stuff.split.each do |couple|
# puts "found #{couple}"
k, v = couple.split('=')
v ||= ""
if v[0,1]=='"' then v = v[1, v.size-2] end
# puts "key:_#{k}_ value=_#{v}_"
hash[k.to_sym] = v
end
end
# puts hash.inspect
out.push md_ref_def(id, url, meta={:title=>title})
end
def read_table(src)
def split_cells(s)
s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
end
head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) }
separator=split_cells(src.shift_line)
align = separator.map { |s| s =~ Sep
if $1 and $2 then :center elsif $2 then :right else :left end }
num_columns = align.size
if head.size != num_columns
maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
tell_user "I will ignore this table."
# XXX try to recover
return md_br()
end
rows = []
while src.cur_line && src.cur_line =~ /\|/
row = split_cells(src.shift_line).map{|s|
md_el(:cell, parse_lines_as_span([s]))}
if head.size != num_columns
maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
tell_user "I will ignore this table."
# XXX try to recover
return md_br()
end
rows << row
end
children = (head+rows).flatten
return md_el(:table, children, {:align => align})
end
# If current line is text, a definition list is coming
# if 1) text,empty,[text,empty]*,definition
def eventually_comes_a_def_list(src)
future = src.tell_me_the_future
ok = future =~ %r{^t+e?d}x
# puts "future: #{future} - #{ok}"
ok
end
def read_definition(src)
# Read one or more terms
terms = []
while src.cur_line && src.cur_line.md_type == :text
terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))
end
# dbg_describe_ary(terms, 'DT')
want_my_paragraph = false
raise "Chunky Bacon!" if not src.cur_line
# one optional empty
if src.cur_line.md_type == :empty
want_my_paragraph = true
src.shift_line
end
raise "Chunky Bacon!" if src.cur_line.md_type != :definition
# Read one or more definitions
definitions = []
while src.cur_line && src.cur_line.md_type == :definition
parent_offset = src.cur_index
first = src.shift_line
first =~ Definition
first = $1
# I know, it's ugly!!!
lines, w_m_p =
read_indented_content(src,4, [:definition], :definition)
want_my_paragraph ||= w_m_p
lines.unshift first
# dbg_describe_ary(lines, 'DD')
src2 = LineSource.new(lines, src, parent_offset)
children = parse_blocks(src2)
definitions << md_el(:definition_data, children)
end
return md_el(:definition, terms+definitions, {
:terms => terms,
:definitions => definitions,
:want_my_paragraph => want_my_paragraph})
end
end # BlockLevelParser
end # MaRuKu
end
end

View file

@ -0,0 +1,226 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'iconv'
module MaRuKu; module In; module Markdown; module BlockLevelParser
def parse_doc(s)
meta2 = parse_email_headers(s)
data = meta2[:data]
meta2.delete :data
self.attributes.merge! meta2
=begin maruku_doc
Attribute: encoding
Scope: document
Summary: Encoding for the document.
If the `encoding` attribute is specified, then the content
will be converted from the specified encoding to UTF-8.
Conversion happens using the `iconv` library.
=end
enc = self.attributes[:encoding]
self.attributes.delete :encoding
if enc && enc.downcase != 'utf-8'
converted = Iconv.new('utf-8', enc).iconv(data)
# puts "Data: #{data.inspect}: #{data}"
# puts "Conv: #{converted.inspect}: #{converted}"
data = converted
end
@children = parse_text_as_markdown(data)
if true #markdown_extra?
self.search_abbreviations
self.substitute_markdown_inside_raw_html
end
toc = create_toc
# use title if not set
if not self.attributes[:title] and toc.header_element
title = toc.header_element.to_s
self.attributes[:title] = title
# puts "Set document title to #{title}"
end
# save for later use
self.toc = toc
# Now do the attributes magic
each_element do |e|
# default attribute list
if default = self.ald[e.node_type.to_s]
expand_attribute_list(default, e.attributes)
end
expand_attribute_list(e.al, e.attributes)
# puts "#{e.node_type}: #{e.attributes.inspect}"
end
=begin maruku_doc
Attribute: unsafe_features
Scope: global
Summary: Enables execution of XML instructions.
Disabled by default because of security concerns.
=end
if Maruku::Globals[:unsafe_features]
self.execute_code_blocks
# TODO: remove executed code blocks
end
end
# Expands an attribute list in an Hash
def expand_attribute_list(al, result)
al.each do |k, v|
case k
when :class
if not result[:class]
result[:class] = v
else
result[:class] += " " + v
end
when :id; result[:id] = v
when :ref;
if self.ald[v]
already = (result[:expanded_references] ||= [])
if not already.include?(v)
already.push v
expand_attribute_list(self.ald[v], result)
else
already.push v
maruku_error "Circular reference between labels.\n\n"+
"Label #{v.inspect} calls itself via recursion.\nThe recursion is "+
(already.map{|x| x.inspect}.join(' => '))
end
else
if not result[:unresolved_references]
result[:unresolved_references] = v
else
result[:unresolved_references] << " #{v}"
end
# $stderr.puts "Unresolved reference #{v.inspect} (avail: #{self.ald.keys.inspect})"
result[v.to_sym] = true
end
else
result[k.to_sym]=v
end
end
end
def safe_execute_code(object, code)
begin
return object.instance_eval(code)
rescue Exception => e
maruku_error "Exception while executing this:\n"+
add_tabs(code, 1, ">")+
"\nThe error was:\n"+
add_tabs(e.inspect+"\n"+e.caller.join("\n"), 1, "|")
rescue RuntimeError => e
maruku_error "2: Exception while executing this:\n"+
add_tabs(code, 1, ">")+
"\nThe error was:\n"+
add_tabs(e.inspect, 1, "|")
rescue SyntaxError => e
maruku_error "2: Exception while executing this:\n"+
add_tabs(code, 1, ">")+
"\nThe error was:\n"+
add_tabs(e.inspect, 1, "|")
end
nil
end
def execute_code_blocks
self.each_element(:xml_instr) do |e|
if e.target == 'maruku'
result = safe_execute_code(e, e.code)
if result.kind_of?(String)
puts "Result is : #{result.inspect}"
end
end
end
end
def search_abbreviations
self.abbreviations.each do |abbrev, title|
reg = Regexp.new(Regexp.escape(abbrev))
self.replace_each_string do |s|
if m = reg.match(s)
e = md_abbr(abbrev.dup, title ? title.dup : nil)
[m.pre_match, e, m.post_match]
else
s
end
end
end
end
include REXML
# (PHP Markdown extra) Search for elements that have
# markdown=1 or markdown=block defined
def substitute_markdown_inside_raw_html
self.each_element(:raw_html) do |e|
doc = e.instance_variable_get :@parsed_html
if doc # valid html
# parse block-level markdown elements in these HTML tags
block_tags = ['div']
# use xpath to find elements with 'markdown' attribute
XPath.match(doc, "//*[attribute::markdown]" ).each do |e|
# puts "Found #{e}"
# should we parse block-level or span-level?
parse_blocks = (e.attributes['markdown'] == 'block') ||
block_tags.include?(e.name)
# remove 'markdown' attribute
e.delete_attribute 'markdown'
# Select all text elements of e
XPath.match(e, "//text()" ).each { |original_text|
s = original_text.value.strip
if s.size > 0
el = md_el(:dummy,
parse_blocks ? parse_text_as_markdown(s) :
parse_lines_as_span([s]) )
p = original_text.parent
el.children_to_html.each do |x|
p.insert_before(original_text, x)
end
p.delete(original_text)
end
}
end
end
end
end
end end end end

View file

@ -0,0 +1,704 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'set'
module MaRuKu; module In; module Markdown; module SpanLevelParser
include MaRuKu::Helpers
EscapedCharInText =
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
EscapedCharInQuotes =
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
EscapedCharInInlineCode = [?\\,?`]
def parse_lines_as_span(lines, parent=nil)
parse_span_better lines.join("\n"), parent
end
def parse_span_better(string, parent=nil)
if not string.kind_of? String then
error "Passed #{string.class}." end
st = (string + "")
st.freeze
src = CharSource.new(st, parent)
read_span(src, EscapedCharInText, [nil])
end
# This is the main loop for reading span elements
#
# It's long, but not *complex* or difficult to understand.
#
#
def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
con = SpanContext.new
c = d = nil
while true
c = src.cur_char
# This is only an optimization which cuts 50% of the time used.
# (but you can't use a-zA-z in exit_on_chars)
if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
con.cur_string << src.shift_char
next
end
break if exit_on_chars && exit_on_chars.include?(c)
break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
# check if there are extensions
if check_span_extensions(src, con)
next
end
case c = src.cur_char
when ?\ # it's space (32)
if src.cur_chars_are " \n"
src.ignore_chars(3)
con.push_element md_br()
next
else
src.ignore_char
con.push_space
end
when ?\n, ?\t
src.ignore_char
con.push_space
when ?`
read_inline_code(src,con)
when ?<
# It could be:
# 1) HTML "<div ..."
# 2) HTML "<!-- ..."
# 3) url "<http:// ", "<ftp:// ..."
# 4) email "<andrea@... ", "<mailto:andrea@..."
# 5) on itself! "a < b "
# 6) Start of <<guillemettes>>
case d = src.next_char
when ?<; # guillemettes
src.ignore_chars(2)
con.push_char ?<
con.push_char ?<
when ?!;
if src.cur_chars_are '<!--'
read_inline_html(src, con)
else
con.push_char src.shift_char
end
when ??
read_xml_instr_span(src, con)
when ?\ , ?\t
con.push_char src.shift_char
else
if src.next_matches(/<mailto:/) or
src.next_matches(/<[\w\.]+\@/)
read_email_el(src, con)
elsif src.next_matches(/<\w+:/)
read_url_el(src, con)
elsif src.next_matches(/<\w/)
#puts "This is HTML: #{src.cur_chars(20)}"
read_inline_html(src, con)
else
#puts "This is NOT HTML: #{src.cur_chars(20)}"
con.push_char src.shift_char
end
end
when ?\\
d = src.next_char
if d == ?'
src.ignore_chars(2)
con.push_element md_entity('apos')
elsif d == ?"
src.ignore_chars(2)
con.push_element md_entity('quot')
elsif escaped.include? d
src.ignore_chars(2)
con.push_char d
else
con.push_char src.shift_char
end
when ?[
if markdown_extra? && src.next_char == ?^
read_footnote_ref(src,con)
else
read_link(src, con)
end
when ?!
if src.next_char == ?[
read_image(src, con)
else
con.push_char src.shift_char
end
when ?&
if m = src.read_regexp(/\&([\w\d]+);/)
con.push_element md_entity(m[1])
else
con.push_char src.shift_char
end
when ?*
if not src.next_char
maruku_error "Opening * as last char.", src, con
maruku_recover "Threating as literal"
con.push_char src.shift_char
else
follows = src.cur_chars(4)
if follows =~ /^\*\*\*[^\s\*]/
con.push_element read_emstrong(src,'***')
elsif follows =~ /^\*\*[^\s\*]/
con.push_element read_strong(src,'**')
elsif follows =~ /^\*[^\s\*]/
con.push_element read_em(src,'*')
else # * is just a normal char
con.push_char src.shift_char
end
end
when ?_
if not src.next_char
maruku_error "Opening _ as last char", src, con
maruku_recover "Threating as literal", src, con
con.push_char src.shift_char
else
follows = src.cur_chars(4)
if follows =~ /^\_\_\_[^\s\_]/
con.push_element read_emstrong(src,'___')
elsif follows =~ /^\_\_[^\s\_]/
con.push_element read_strong(src,'__')
elsif follows =~ /^\_[^\s\_]/
con.push_element read_em(src,'_')
else # _ is just a normal char
con.push_char src.shift_char
end
end
when ?{ # extension
src.ignore_char # {
interpret_extension(src, con, [?}])
src.ignore_char # }
when nil
maruku_error ("Unclosed span (waiting for %s"+
"#{exit_on_strings.inspect})") % [
exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
src,con
break
else # normal text
con.push_char src.shift_char
end # end case
end # end while true
con.push_string_if_present
# Assign IAL to elements
merge_ial(con.elements, src, con)
# Remove leading space
if (s = con.elements.first).kind_of? String
if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
con.elements.shift if s.size == 0
end
# Remove final spaces
if (s = con.elements.last).kind_of? String
s.chop! if s[-1] == ?\
con.elements.pop if s.size == 0
end
educated = educate(con.elements)
educated
end
def read_xml_instr_span(src, con)
src.ignore_chars(2) # starting <?
# read target <?target code... ?>
target = if m = src.read_regexp(/(\w+)/)
m[1]
else
''
end
delim = "?>"
code =
read_simple(src, escaped=[], break_on_chars=[],
break_on_strings=[delim])
src.ignore_chars delim.size
code = (code || "").strip
con.push_element md_xml_instr(target, code)
end
# Start: cursor on character **after** '{'
# End: curson on '}' or EOF
def interpret_extension(src, con, break_on_chars)
case src.cur_char
when ?:
src.ignore_char # :
extension_meta(src, con, break_on_chars)
when ?#, ?.
extension_meta(src, con, break_on_chars)
else
stuff = read_simple(src, escaped=[?}], break_on_chars, [])
if stuff =~ /^(\w+\s|[^\w])/
extension_id = $1.strip
if false
else
maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
"I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
extension_meta(src, con, break_on_chars)
end
else
maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
extension_meta(src, con, break_on_chars)
end
end
end
def extension_meta(src, con, break_on_chars)
if m = src.read_regexp(/([^\s\:]+):/)
name = m[1]
al = read_attribute_list(src, con, break_on_chars)
# puts "#{name}=#{al.inspect}"
self.doc.ald[name] = al
con.push md_ald(name, al)
else
al = read_attribute_list(src, con, break_on_chars)
self.doc.ald[name] = al
con.push md_ial(al)
end
end
def read_url_el(src,con)
src.ignore_char # leading <
url = read_simple(src, [], [?>])
src.ignore_char # closing >
con.push_element md_url(url)
end
def read_email_el(src,con)
src.ignore_char # leading <
mail = read_simple(src, [], [?>])
src.ignore_char # closing >
address = mail.gsub(/^mailto:/,'')
con.push_element md_email(address)
end
def read_url(src, break_on)
if [?',?"].include? src.cur_char
error 'Invalid char for url', src
end
url = read_simple(src, [], break_on)
if not url # empty url
url = ""
end
if url[0] == ?< && url[-1] == ?>
url = url[1, url.size-2]
end
if url.size == 0
return nil
end
url
end
def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
case src.cur_char
when ?', ?"
read_quoted(src, con)
else
read_simple(src, escaped, exit_on_chars)
end
end
# Tries to read a quoted value. If stream does not
# start with ' or ", returns nil.
def read_quoted(src, con)
case src.cur_char
when ?', ?"
quote_char = src.shift_char # opening quote
string = read_simple(src, EscapedCharInQuotes, [quote_char])
src.ignore_char # closing quote
return string
else
# puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
return nil
end
end
# Reads a simple string (no formatting) until one of break_on_chars,
# while escaping the escaped.
# If the string is empty, it returns nil.
# Raises on error if the string terminates unexpectedly.
# # If eat_delim is true, and if the delim is not the EOF, then the delim
# # gets eaten from the stream.
def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
text = ""
while true
# puts "Reading simple #{text.inspect}"
c = src.cur_char
if exit_on_chars && exit_on_chars.include?(c)
# src.ignore_char if eat_delim
break
end
break if exit_on_strings &&
exit_on_strings.any? {|x| src.cur_chars_are x}
case c
when nil
s= "String finished while reading (break on "+
"#{exit_on_chars.map{|x|""<<x}.inspect})"+
" already read: #{text.inspect}"
maruku_error s, src
maruku_recover "I boldly continue", src
break
when ?\\
d = src.next_char
if escaped.include? d
src.ignore_chars(2)
text << d
else
text << src.shift_char
end
else
text << src.shift_char
end
end
# puts "Read simple #{text.inspect}"
text.empty? ? nil : text
end
def read_em(src, delim)
src.ignore_char
children = read_span(src, EscapedCharInText, nil, [delim])
src.ignore_char
md_em(children)
end
def read_strong(src, delim)
src.ignore_chars(2)
children = read_span(src, EscapedCharInText, nil, [delim])
src.ignore_chars(2)
md_strong(children)
end
def read_emstrong(src, delim)
src.ignore_chars(3)
children = read_span(src, EscapedCharInText, nil, [delim])
src.ignore_chars(3)
md_emstrong(children)
end
SPACE = ?\ # = 32
# R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
# Reads a bracketed id "[refid]". Consumes also both brackets.
def read_ref_id(src, con)
src.ignore_char # [
src.consume_whitespace
# puts "Next: #{src.cur_chars(10).inspect}"
if m = src.read_regexp(R_REF_ID)
# puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
# puts "Then: #{src.cur_chars(10).inspect}"
m[1]
else
nil
end
end
def read_footnote_ref(src,con)
ref = read_ref_id(src,con)
con.push_element md_foot_ref(ref)
end
def read_inline_html(src, con)
h = HTMLHelper.new
begin
# This is our current buffer in the context
start = src.current_remaining_buffer
h.eat_this start
if not h.is_finished?
error "inline_html: Malformed:\n "+
"#{start.inspect}\n #{h.inspect}",src,con
end
consumed = start.size - h.rest.size
if consumed > 0
con.push_element md_html(h.stuff_you_read)
src.ignore_chars(consumed)
else
puts "HTML helper did not work on #{start.inspect}"
con.push_char src.shift_char
end
rescue Exception => e
maruku_error "Bad html: \n" +
add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
src,con
maruku_recover "I will try to continue after bad HTML.", src, con
con.push_char src.shift_char
end
end
def read_inline_code(src, con)
# Count the number of ticks
num_ticks = 0
while src.cur_char == ?`
num_ticks += 1
src.ignore_char
end
# We will read until this string
end_string = "`"*num_ticks
code =
read_simple(src, escaped=[], break_on_chars=[],
break_on_strings=[end_string])
# puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
src.ignore_chars num_ticks
# Ignore at most one space
if num_ticks > 1 && code[0] == SPACE
code = code[1, code.size-1]
end
# drop last space
if num_ticks > 1 && code[-1] == SPACE
code = code[0,code.size-1]
end
# puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
con.push_element md_code(code)
end
def read_link(src, con)
# we read the string and see what happens
src.ignore_char # opening bracket
children = read_span(src, EscapedCharInText, [?]])
src.ignore_char # closing bracket
# ignore space
if src.cur_char == SPACE and
(src.next_char == ?[ or src.next_char == ?( )
src.shift_char
end
case src.cur_char
when ?(
src.ignore_char # opening (
src.consume_whitespace
url = read_url(src, [SPACE,?\t,?)])
if not url
url = '' # no url is ok
end
src.consume_whitespace
title = nil
if src.cur_char != ?) # we have a title
quote_char = src.cur_char
title = read_quoted(src,con)
if not title
maruku_error 'Must quote title',src,con
else
# Tries to read a title with quotes: ![a](url "ti"tle")
# this is the most ugly thing in Markdown
if not src.next_matches(/\s*\)/)
# if there is not a closing par ), then read
# the rest and guess it's title with quotes
rest = read_simple(src, escaped=[], break_on_chars=[?)],
break_on_strings=[])
# chop the closing char
rest.chop!
title << quote_char << rest
end
end
end
src.consume_whitespace
closing = src.shift_char # closing )
if closing != ?)
maruku_error 'Unclosed link',src,con
maruku_recover "No closing ): I will not create"+
" the link for #{children.inspect}", src, con
con.push_elements children
return
end
con.push_element md_im_link(children,url, title)
when ?[ # link ref
ref_id = read_ref_id(src,con)
if ref_id
if ref_id.size == 0
ref_id = children.to_s.downcase.gsub(' ','_')
else
ref_id = ref_id.downcase
end
con.push_element md_link(children, ref_id)
else
maruku_error "Could not read ref_id", src, con
maruku_recover "I will not create the link for "+
"#{children.inspect}", src, con
con.push_elements children
return
end
else # empty [link]
id = children.to_s.downcase.gsub(' ','_')
con.push_element md_link(children, id)
end
end # read link
def read_image(src, con)
src.ignore_chars(2) # opening "!["
alt_text = read_span(src, EscapedCharInText, [?]])
src.ignore_char # closing bracket
# ignore space
if src.cur_char == SPACE and
(src.next_char == ?[ or src.next_char == ?( )
src.ignore_char
end
case src.cur_char
when ?(
src.ignore_char # opening (
src.consume_whitespace
url = read_url(src, [SPACE,?\t,?)])
if not url
error "Could not read url from #{src.cur_chars(10).inspect}",
src,con
end
src.consume_whitespace
title = nil
if src.cur_char != ?) # we have a title
quote_char = src.cur_char
title = read_quoted(src,con)
if not title
maruku_error 'Must quote title',src,con
else
# Tries to read a title with quotes: ![a](url "ti"tle")
# this is the most ugly thing in Markdown
if not src.next_matches(/\s*\)/)
# if there is not a closing par ), then read
# the rest and guess it's title with quotes
rest = read_simple(src, escaped=[], break_on_chars=[?)],
break_on_strings=[])
# chop the closing char
rest.chop!
title << quote_char << rest
end
end
end
src.consume_whitespace
closing = src.shift_char # closing )
if closing != ?)
error ("Unclosed link: '"<<closing<<"'")+
" Read url=#{url.inspect} title=#{title.inspect}",src,con
end
con.push_element md_im_image(alt_text, url, title)
when ?[ # link ref
ref_id = read_ref_id(src,con)
if ref_id.size == 0
ref_id = alt_text.to_s.downcase.gsub(' ','_')
else
ref_id = ref_id.downcase
end
con.push_element md_image(alt_text, ref_id)
else # no stuff
ref_id = alt_text.to_s.downcase.gsub(' ','_')
con.push_element md_image(alt_text, ref_id)
end
end # read link
class SpanContext
include MaRuKu::Strings
# Read elements
attr_accessor :elements
attr_accessor :cur_string
def initialize
@elements = []
@cur_string = ""
end
def push_element(e)
raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
not (e.kind_of?(String) or e.kind_of?(MDElement))
push_string_if_present
@elements << e
nil
end
alias push push_element
def push_elements(a)
for e in a
if e.kind_of? String
e.each_byte do |b| push_char b end
else
push_element e
end
end
end
def push_string_if_present
if @cur_string.size > 0
@elements << @cur_string
@cur_string = ""
end
nil
end
def push_char(c)
@cur_string << c
nil
end
# push space into current string if
# there isn't one
def push_space
last = @cur_string[@cur_string.size-1]
@cur_string << ?\ if last != ?\
end
def describe
lines = @elements.map{|x| x.inspect}.join("\n")
s = "Elements read in span: \n" +
add_tabs(lines,1, ' -')+"\n"
if @cur_string.size > 0
s += "Current string: \n #{@cur_string.inspect}\n"
end
s
end
end # SpanContext
end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser

View file

@ -0,0 +1,225 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
#
# NOTA BENE:
#
# The following algorithm is a rip-off of RubyPants written by
# Christian Neukirchen.
#
# RubyPants is a Ruby port of SmartyPants written by John Gruber.
#
# This file is distributed under the GPL, which I guess is compatible
# with the terms of the RubyPants license.
#
# -- Andrea Censi
# = RubyPants -- SmartyPants ported to Ruby
#
# Ported by Christian Neukirchen <mailto:chneukirchen@gmail.com>
# Copyright (C) 2004 Christian Neukirchen
#
# Incooporates ideas, comments and documentation by Chad Miller
# Copyright (C) 2004 Chad Miller
#
# Original SmartyPants by John Gruber
# Copyright (C) 2003 John Gruber
#
#
# = RubyPants -- SmartyPants ported to Ruby
#
#
# [snip]
#
# == Authors
#
# John Gruber did all of the hard work of writing this software in
# Perl for Movable Type and almost all of this useful documentation.
# Chad Miller ported it to Python to use with Pyblosxom.
#
# Christian Neukirchen provided the Ruby port, as a general-purpose
# library that follows the *Cloth API.
#
#
# == Copyright and License
#
# === SmartyPants license:
#
# Copyright (c) 2003 John Gruber
# (http://daringfireball.net)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# * Neither the name "SmartyPants" nor the names of its contributors
# may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# This software is provided by the copyright holders and contributors
# "as is" and any express or implied warranties, including, but not
# limited to, the implied warranties of merchantability and fitness
# for a particular purpose are disclaimed. In no event shall the
# copyright owner or contributors be liable for any direct, indirect,
# incidental, special, exemplary, or consequential damages (including,
# but not limited to, procurement of substitute goods or services;
# loss of use, data, or profits; or business interruption) however
# caused and on any theory of liability, whether in contract, strict
# liability, or tort (including negligence or otherwise) arising in
# any way out of the use of this software, even if advised of the
# possibility of such damage.
#
# === RubyPants license
#
# RubyPants is a derivative work of SmartyPants and smartypants.py.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# This software is provided by the copyright holders and contributors
# "as is" and any express or implied warranties, including, but not
# limited to, the implied warranties of merchantability and fitness
# for a particular purpose are disclaimed. In no event shall the
# copyright owner or contributors be liable for any direct, indirect,
# incidental, special, exemplary, or consequential damages (including,
# but not limited to, procurement of substitute goods or services;
# loss of use, data, or profits; or business interruption) however
# caused and on any theory of liability, whether in contract, strict
# liability, or tort (including negligence or otherwise) arising in
# any way out of the use of this software, even if advised of the
# possibility of such damage.
#
#
# == Links
#
# John Gruber:: http://daringfireball.net
# SmartyPants:: http://daringfireball.net/projects/smartypants
#
# Chad Miller:: http://web.chad.org
#
# Christian Neukirchen:: http://kronavita.de/chris
module MaRuKu; module In; module Markdown; module SpanLevelParser
Punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
Close_class = %![^\ \t\r\n\\[\{\(\-]!
Rules = [
[/---/, :mdash ],
[/--/, :ndash ],
['...', :hellip ],
['. . .', :hellip ],
["``", :ldquo ],
["''", :rdquo ],
[/<<\s/, [:laquo, :nbsp] ],
[/\s>>/, [:nbsp, :raquo] ],
[/<</, :laquo ],
[/>>/, :raquo ],
# def educate_single_backticks(str)
# ["`", :lsquo]
# ["'", :rsquo]
# Special case if the very first character is a quote followed by
# punctuation at a non-word-break. Close the quotes by brute
# force:
[/^'(?=#{Punct_class}\B)/, :rsquo],
[/^"(?=#{Punct_class}\B)/, :rdquo],
# Special case for double sets of quotes, e.g.:
# <p>He said, "'Quoted' words in a larger quote."</p>
[/"'(?=\w)/, [:ldquo, :lsquo] ],
[/'"(?=\w)/, [:lsquo, :ldquo] ],
# Special case for decade abbreviations (the '80s):
[/'(?=\d\ds)/, :rsquo ],
# Get most opening single quotes:
[/(\s)'(?=\w)/, [:one, :lsquo] ],
# Single closing quotes:
[/(#{Close_class})'/, [:one, :rsquo]],
[/'(\s|s\b|$)/, [:rsquo, :one]],
# Any remaining single quotes should be opening ones:
[/'/, :lsquo],
# Get most opening double quotes:
[/(\s)"(?=\w)/, [:one, :ldquo]],
# Double closing quotes:
[/(#{Close_class})"/, [:one, :rdquo]],
[/"(\s|s\b|$)/, [:rdquo, :one]],
# Any remaining quotes should be opening ones:
[/"/, :ldquo]
].
map{|reg, subst| # People should do the thinking, machines should do the work.
reg = Regexp.new(Regexp.escape(reg)) if not reg.kind_of? Regexp
subst = [subst] if not subst.kind_of?Array
[reg, subst]}
# note: input will be destroyed
def apply_one_rule(reg, subst, input)
output = []
while first = input.shift
if first.kind_of?(String) && (m = reg.match(first))
output.push m. pre_match if m. pre_match.size > 0
input.unshift m.post_match if m.post_match.size > 0
subst.reverse.each do |x|
input.unshift( x == :one ? m[1] : md_entity(x.to_s) ) end
else
output.push first
end
end
return output
end
def educate(elements)
Rules.each do |reg, subst|
elements = apply_one_rule(reg, subst, elements)
end
# strips empty strings
elements.delete_if {|x| x.kind_of?(String) && x.size == 0}
final = []
# join consecutive strings
elements.each do |x|
if x.kind_of?(String) && final.last.kind_of?(String)
final.last << x
else
final << x
end
end
return final
end
end end end end

View file

@ -0,0 +1,141 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class String
include MaRuKu::Strings
def md_type()
@md_type ||= line_md_type(self)
end
end
class NilClass
def md_type() nil end
end
# This code does the classification of lines for block-level parsing.
module MaRuKu; module Strings
def line_md_type(l)
# The order of evaluation is important (:text is a catch-all)
return :text if l =~ /^[a-zA-Z]/
return :code if number_of_leading_spaces(l)>=4
return :empty if l =~ /^\s*$/
return :footnote_text if l =~ FootnoteText
return :ref_definition if l =~ LinkRegex or l=~ IncompleteLink
return :abbreviation if l =~ Abbreviation
return :definition if l =~ Definition
# I had a bug with emails and urls at the beginning of the
# line that were mistaken for raw_html
return :text if l=~ /^#{EMailAddress}/
return :text if l=~ /^<http:/
# raw html is like PHP Markdown Extra: at most three spaces before
return :xml_instr if l =~ %r{^\s*<\?}
return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?<\!\-\-}
return :ulist if l =~ /^\s?([\*\-\+])\s+.*\w+/
return :olist if l =~ /^\s?\d+\..*\w+/
return :header1 if l =~ /^(=)+/
return :header2 if l =~ /^([-\s])+$/
return :header3 if l =~ /^(#)+\s*\S+/
# at least three asterisks on a line, and only whitespace
return :hrule if l =~ /^(\s*\*\s*){3,1000}$/
return :hrule if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
return :hrule if l =~ /^(\s*_\s*){3,1000}$/ # or underscores
return :quote if l =~ /^>/
return :metadata if l =~ /^@/
# if @@new_meta_data?
return :ald if l =~ AttributeDefinitionList
return :ial if l =~ InlineAttributeList
# end
# return :equation_end if l =~ EquationEnd
return :text # else, it's just text
end
# $1 = id $2 = attribute list
AttributeDefinitionList = /^\s{0,3}\{([\w\d\s]+)\}:\s*(.*)\s*$/
#
InlineAttributeList = /^\s{0,3}\{(.*)\}\s*$/
# Example:
# ^:blah blah
# ^: blah blah
# ^ : blah blah
Definition = %r{
^ # begin of line
[ ]{0,3} # up to 3 spaces
: # colon
\s* # whitespace
(\S.*) # the text = $1
$ # end of line
}x
# Example:
# *[HTML]: Hyper Text Markup Language
Abbreviation = %r{
^ # begin of line
\* # one asterisk
\[ # opening bracket
([^\]]+) # any non-closing bracket: id = $1
\] # closing bracket
: # colon
\s* # whitespace
(\S.*\S)* # definition=$2
\s* # strip this whitespace
$ # end of line
}x
FootnoteText = %r{
^\s*\[(\^.+)\]: # id = $1 (including '^')
\s*(\S.*)?$ # text = $2 (not obb.)
}x
# This regex is taken from BlueCloth sources
# Link defs are in the form: ^[id]: \n? url "optional title"
LinkRegex = %r{
^[ ]{0,3}\[([^\[\]]+)\]: # id = $1
[ ]*
<?(\S+)>? # url = $2
[ ]*
(?:# Titles are delimited by "quotes" or (parens).
["(']
(.+?) # title = $3
[")'] # Matching ) or "
\s*(.+)? # stuff = $4
)? # title is optional
}x
IncompleteLink = %r{^[ ]{0,3}\[([^\[\]]+)\]:\s*$}
HeaderWithId = /^(.*)\{\#([\w_-]+)\}\s*$/
HeaderWithAttributes = /^(.*)\{(.*)\}\s*$/
# if contains a pipe, it could be a table header
MightBeTableHeader = %r{\|}
# -------------:
Sep = /\s*(\:)?\s*-+\s*(\:)?\s*/
# | -------------:| ------------------------------ |
TableSeparator = %r{^(\|?#{Sep}\|?)+\s*$}
EMailAddress = /<([^:]+@[^:]+)>/
end end

View file

@ -0,0 +1,33 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
# The Maruku class is the public interface
class Maruku
def initialize(s=nil, meta={})
super(nil)
self.attributes.merge! meta
if s
parse_doc(s)
end
end
end

View file

@ -0,0 +1,862 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'rexml/document'
class String
# A string is rendered into HTML by creating
# a REXML::Text node. REXML takes care of all the encoding.
def to_html
REXML::Text.new(self)
end
end
class REXML::Element
# We only want to output the children in Maruku::to_html
public :write_children
end
# This module groups all functions related to HTML export.
module MaRuKu; module Out; module HTML
include REXML
# Render as an HTML fragment (no head, just the content of BODY). (returns a string)
def to_html(context={})
indent = context[:indent] || -1
ie_hack = context[:ie_hack] ||true
div = Element.new 'dummy'
children_to_html.each do |e|
div << e
end
# render footnotes
if @doc.footnotes_order.size > 0
div << render_footnotes
end
doc = Document.new(nil,{:respect_whitespace =>:all})
doc << div
# REXML Bug? if indent!=-1 whitespace is not respected for 'pre' elements
# containing code.
xml =""
div.write_children(xml,indent,transitive=true,ie_hack)
xml
end
# Render to a complete HTML document (returns a string)
def to_html_document(context={})
indent = context[:indent] || -1
ie_hack = context[:ie_hack] ||true
doc = to_html_document_tree
xml = ""
# REXML Bug? if indent!=-1 whitespace is not respected for 'pre' elements
# containing code.
doc.write(xml,indent,transitive=true,ie_hack);
xhtml10strict = "
<?xml version='1.0' encoding='utf-8'?>
<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN'
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>\n"
xhtml11strict_mathml2 = '<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"
"http://www.w3.org/TR/MathML2/dtd/xhtml-math11-f.dtd" [
<!ENTITY mathml "http://www.w3.org/1998/Math/MathML">
]>
'
xhtml11_mathml2_svg11 =
'<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC
"-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
"http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
'
xhtml11_mathml2_svg11 + xml
end
def xml_newline() Text.new("\n") end
=begin maruku_doc
Attribute: title
Scope: document
Sets the title of the document.
If a title is not specified, the first header will be used.
These should be equivalent:
Title: my document
Content
and
my document
===========
Content
In both cases, the title is set to "my document".
=end
=begin maruku_doc
Attribute: subject
Scope: document
Synonim for `title`.
=end
=begin maruku_doc
Attribute: css
Scope: document
Output: HTML
Summary: Activates CSS stylesheets for HTML.
`css` should be a space-separated list of urls.
Example:
CSS: style.css math.css
=end
# Render to a complete HTML document (returns a REXML document tree)
def to_html_document_tree
doc = Document.new(nil,{:respect_whitespace =>:all})
# doc << XMLDecl.new
root = Element.new('html', doc)
root.add_namespace('http://www.w3.org/1999/xhtml')
root.add_namespace('svg', "http://www.w3.org/2000/svg" )
lang = self.attributes[:lang] || 'en'
root.attributes['xml:lang'] = lang
root << xml_newline
head = Element.new 'head', root
#<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
me = Element.new 'meta', head
me.attributes['http-equiv'] = 'Content-type'
# me.attributes['content'] = 'text/html;charset=utf-8'
me.attributes['content'] = 'application/xhtml+xml;charset=utf-8'
# Create title element
doc_title = self.attributes[:title] || self.attributes[:subject] || ""
title = Element.new 'title', head
title << Text.new(doc_title)
if css_list = self.attributes[:css]
css_list.split.each do |css|
# <link type="text/css" rel="stylesheet" href="..." />
link = Element.new 'link'
link.attributes['type'] = 'text/css'
link.attributes['rel'] = 'stylesheet'
link.attributes['href'] = css
head << link
head << xml_newline
end
end
root << xml_newline
body = Element.new 'body'
children_to_html.each do |e|
body << e
end
# render footnotes
if @doc.footnotes_order.size > 0
body << render_footnotes
end
# When we are rendering a whole document, we add a signature
# at the bottom.
if get_setting(:maruku_signature)
body << maruku_html_signature
end
root << body
doc
end
# returns "st","nd","rd" or "th" as appropriate
def day_suffix(day)
s = {
1 => 'st',
2 => 'nd',
3 => 'rd',
21 => 'st',
22 => 'nd',
23 => 'rd',
31 => 'st'
}
return s[day] || 'th';
end
# formats a nice date
def nice_date
t = Time.now
t.strftime(" at %H:%M on ")+
t.strftime("%A, %B %d")+
day_suffix(t.day)+
t.strftime(", %Y")
end
def maruku_html_signature
div = Element.new 'div'
div.attributes['class'] = 'maruku_signature'
Element.new 'hr', div
span = Element.new 'span', div
span.attributes['style'] = 'font-size: small; font-style: italic'
span << Text.new('Created by ')
a = Element.new('a', span)
a.attributes['href'] = 'http://maruku.rubyforge.org'
a.attributes['title'] = 'Maruku: a Markdown-superset interpreter for Ruby'
a << Text.new('Maruku')
span << Text.new(nice_date+".")
div
end
def render_footnotes
div = Element.new 'div'
div.attributes['class'] = 'footnotes'
div << Element.new('hr')
ol = Element.new 'ol'
@doc.footnotes_order.each_with_index do |fid, i| num = i+1
f = self.footnotes[fid]
if f
li = f.wrap_as_element('li')
li.attributes['id'] = "fn:#{num}"
a = Element.new 'a'
a.attributes['href'] = "#fnref:#{num}"
a.attributes['rev'] = 'footnote'
a<< Text.new('&#8617;', true, nil, true)
li.insert_after(li.children.last, a)
ol << li
else
maruku_error"Could not find footnote '#{fid}'"
end
end
div << ol
div
end
def to_html_hrule; create_html_element 'hr' end
def to_html_linebreak; Element.new 'br' end
# renders children as html and wraps into an element of given name
#
# Sets 'id' if meta is set
def wrap_as_element(name, attributes_to_copy=[])
m = create_html_element(name, attributes_to_copy)
children_to_html.each do |e| m << e; end
# m << Comment.new( "{"+self.al.to_md+"}") if not self.al.empty?
# m << Comment.new( @attributes.inspect) if not @attributes.empty?
m
end
=begin maruku_doc
Attribute: id
Scope: element
Output: LaTeX, HTML
It is copied as a standard HTML attribute.
Moreover, it used as a label name for hyperlinks in both HTML and
in PDF.
=end
=begin maruku_doc
Attribute: class
Scope: element
Output: HTML
It is copied as a standard HTML attribute.
=end
=begin maruku_doc
Attribute: style
Scope: element
Output: HTML
It is copied as a standard HTML attribute.
=end
StandardAttributes = [:id, :style, :class]
def create_html_element(name, attributes_to_copy=[])
m = Element.new name
(StandardAttributes+attributes_to_copy).each do |a|
if v = @attributes[a] then m.attributes[a.to_s] = v.to_s end
end
m
end
def to_html_ul
if @attributes[:toc]
# render toc
html_toc = @doc.toc.to_html
return html_toc
else
add_ws wrap_as_element('ul')
end
end
def to_html_paragraph; add_ws wrap_as_element('p') end
def to_html_ol; add_ws wrap_as_element('ol') end
def to_html_li; add_ws wrap_as_element('li') end
def to_html_li_span; add_ws wrap_as_element('li') end
def to_html_quote; add_ws wrap_as_element('blockquote') end
def to_html_strong; wrap_as_element('strong') end
def to_html_emphasis; wrap_as_element('em') end
=begin maruku_doc
Attribute: use_numbered_headers
Scope: document
Summary: Activates the numbering of headers.
If `true`, section headers will be numbered.
In LaTeX export, the numbering of headers is managed
by Maruku, to have the same results in both HTML and LaTeX.
=end
# nil if not applicable, else string
def section_number
return nil if not get_setting(:use_numbered_headers)
n = @attributes[:section_number]
if n && (not n.empty?)
n.join('.')+". "
else
nil
end
end
# nil if not applicable, else SPAN element
def render_section_number
# if we are bound to a section, add section number
if num = section_number
span = Element.new 'span'
span.attributes['class'] = 'maruku_section_number'
span << Text.new(section_number)
span
else
nil
end
end
def to_html_header
element_name = "h#{self.level}"
h = wrap_as_element element_name
if span = render_section_number
h.insert_before(h.children.first, span)
end
add_ws h
end
def source2html(source)
source = source.gsub(/&/,'&amp;')
source = Text.normalize(source)
source = source.gsub(/\&apos;/,'&#39;') # IE bug
source = source.gsub(/'/,'&#39;') # IE bug
Text.new(source, true, nil, true )
end
=begin maruku_doc
Attribute: html_use_syntax
Scope: global, document, element
Output: HTML
Summary: Enables the use of the `syntax` package.
Related: lang, code_lang
Default: <?mrk md_code(Globals[:html_use_syntax].to_s) ?>
If true, the `syntax` package is used. It supports the `ruby` and `xml`
languages. Remember to set the `lang` attribute of the code block.
Examples:
require 'maruku'
{:lang=ruby html_use_syntax=true}
and
<div style="text-align:center">Div</div>
{:lang=html html_use_syntax=true}
produces:
require 'maruku'
{:lang=ruby html_use_syntax=true}
and
<div style="text-align:center">Div</div>
{:lang=html html_use_syntax=true}
=end
def to_html_code;
source = self.raw_code
lang = self.attributes[:lang] || @doc.attributes[:code_lang]
lang = 'xml' if lang=='html'
use_syntax = get_setting :html_use_syntax
element =
if use_syntax && lang
begin
if not $syntax_loaded
require 'rubygems'
require 'syntax'
require 'syntax/convertors/html'
$syntax_loaded = true
end
convertor = Syntax::Convertors::HTML.for_syntax lang
# eliminate trailing newlines otherwise Syntax crashes
source = source.gsub(/\n*\Z/,'')
html = convertor.convert( source )
html = html.gsub(/\&apos;/,'&#39;') # IE bug
html = html.gsub(/'/,'&#39;') # IE bug
# html = html.gsub(/&/,'&amp;')
code = Document.new(html, {:respect_whitespace =>:all}).root
code.name = 'code'
code.attributes['class'] = lang
code.attributes['lang'] = lang
pre = Element.new 'pre'
pre << code
pre
rescue LoadError => e
maruku_error "Could not load package 'syntax'.\n"+
"Please install it, for example using 'gem install syntax'."
to_html_code_using_pre(source)
rescue Object => e
maruku_error"Error while using the syntax library for code:\n#{source.inspect}"+
"Lang is #{lang} object is: \n"+
self.inspect +
"\nException: #{e.class}: #{e.message}\n\t#{e.backtrace.join("\n\t")}"
tell_user("Using normal PRE because the syntax library did not work.")
to_html_code_using_pre(source)
end
else
to_html_code_using_pre(source)
end
color = get_setting(:code_background_color)
if color != Globals[:code_background_color]
element.attributes['style'] = "background-color: #{color};"
end
add_ws element
end
=begin maruku_doc
Attribute: code_background_color
Scope: global, document, element
Summary: Background color for code blocks.
The format is either a named color (`green`, `red`) or a CSS color
of the form `#ff00ff`.
* for **HTML output**, the value is put straight in the `background-color` CSS
property of the block.
* for **LaTeX output**, if it is a named color, it must be a color accepted
by the LaTeX `color` packages. If it is of the form `#ff00ff`, Maruku
defines a color using the `\color[rgb]{r,g,b}` macro.
For example, for `#0000ff`, the macro is called as: `\color[rgb]{0,0,1}`.
=end
def to_html_code_using_pre(source)
pre = create_html_element 'pre'
code = Element.new 'code', pre
s = source
s = s.gsub(/&/,'&amp;')
s = Text.normalize(s)
s = s.gsub(/\&apos;/,'&#39;') # IE bug
s = s.gsub(/'/,'&#39;') # IE bug
if get_setting(:code_show_spaces)
# 187 = raquo
# 160 = nbsp
# 172 = not
s.gsub!(/\t/,'&#187;'+'&#160;'*3)
s.gsub!(/ /,'&#172;')
end
text = Text.new(s, respect_ws=true, parent=nil, raw=true )
if lang = self.attributes[:lang]
code.attributes['lang'] = lang
code.attributes['class'] = lang
end
code << text
pre
end
def to_html_inline_code;
pre = create_html_element 'code'
source = self.raw_code
pre << source2html(source)
color = get_setting(:code_background_color)
if color != Globals[:code_background_color]
pre.attributes['style'] = "background-color: #{color};"
end
pre
end
def add_class_to(el, cl)
el.attributes['class'] =
if already = el.attributes['class']
already + " " + cl
else
cl
end
end
def add_class_to_link(a)
return # not ready yet
url = a.attributes['href']
return if not url
if url =~ /^#/
add_class_to(a, 'maruku-link-samedoc')
elsif url =~ /^http:/
add_class_to(a, 'maruku-link-external')
else
add_class_to(a, 'maruku-link-local')
end
# puts a.attributes['class']
end
def to_html_immediate_link
a = create_html_element 'a'
url = self.url
text = url.gsub(/^mailto:/,'') # don't show mailto
a << Text.new(text)
a.attributes['href'] = url
add_class_to_link(a)
a
end
def to_html_link
a = wrap_as_element 'a'
id = self.ref_id
if ref = @doc.refs[id]
url = ref[:url]
title = ref[:title]
a.attributes['href'] = url if url
a.attributes['title'] = title if title
else
maruku_error "Could not find ref_id = #{id.inspect} for #{self.inspect}\n"+
"Available refs are #{@doc.refs.keys.inspect}"
tell_user "Not creating a link for ref_id = #{id.inspect}."
return wrap_as_element('span')
end
# add_class_to_link(a)
return a
end
def to_html_im_link
if url = self.url
title = self.title
a = wrap_as_element 'a'
a.attributes['href'] = url
a.attributes['title'] = title if title
return a
else
maruku_error"Could not find url in #{self.inspect}"
tell_user "Not creating a link for ref_id = #{id.inspect}."
return wrap_as_element('span')
end
end
def add_ws(e)
[Text.new("\n"), e, Text.new("\n")]
end
##### Email address
def obfuscate(s)
res = ''
s.each_byte do |char|
res += "&#%03d;" % char
end
res
end
def to_html_email_address
email = self.email
a = create_html_element 'a'
#a.attributes['href'] = Text.new("mailto:"+obfuscate(email),false,nil,true)
#a.attributes.add Attribute.new('href',Text.new(
#"mailto:"+obfuscate(email),false,nil,true))
# Sorry, for the moment it doesn't work
a.attributes['href'] = "mailto:#{email}"
a << Text.new(obfuscate(email),false,nil,true)
a
end
##### Images
def to_html_image
a = create_html_element 'img'
id = self.ref_id
if ref = @doc.refs[id]
url = ref[:url]
title = ref[:title]
a.attributes['src'] = url.to_s
a.attributes['alt'] = title.to_s
[:title, :class, :style].each do |s|
a.attributes[s.to_s] = ref[s] if ref[s]
end
else
maruku_error"Could not find id = #{id.inspect} for\n #{self.inspect}"
tell_user "Could not create image with ref_id = #{id.inspect};"+
" Using SPAN element as replacement."
return wrap_as_element('span')
end
return a
end
def to_html_im_image
if not url = self.url
maruku_error"Image with no url: #{self.inspect}"
tell_user "Could not create image with ref_id = #{id.inspect};"+
+" Using SPAN element as replacement."
return wrap_as_element('span')
end
title = self.title
a = create_html_element 'img'
a.attributes['src'] = url
a.attributes['alt'] = title.to_s
return a
end
def to_html_raw_html
raw_html = self.raw_html
if rexml_doc = @parsed_html
root = rexml_doc.root
if root.nil?
s = "Bug in REXML: root() of Document is nil: \n#{rexml_doc.inspect}\n"+
"Raw HTML:\n#{raw_html.inspect}"
maruku_error s
tell_user 'The REXML version you have has a bug, omitting HTML'
div = Element.new 'div'
#div << Text.new(s)
return div
end
# copies the @children array (FIXME is it deep?)
elements = root.to_a
return elements
else # invalid
# Creates red box with offending HTML
tell_user "Wrapping bad html in a PRE with class 'markdown-html-error'\n"+
add_tabs(raw_html,1,'|')
pre = Element.new('pre')
pre.attributes['style'] = 'border: solid 3px red; background-color: pink'
pre.attributes['class'] = 'markdown-html-error'
pre << Text.new("HTML parse error: \n#{raw_html}", true)
return pre
end
end
def to_html_abbr
abbr = Element.new 'abbr'
abbr << Text.new(children[0])
abbr.attributes['title'] = self.title if self.title
abbr
end
def to_html_footnote_reference
id = self.footnote_id
# save the order of used footnotes
order = @doc.footnotes_order
# take next number
order << id
num = order.size;
sup = Element.new 'sup'
sup.attributes['id'] = "fnref:#{num}"
a = Element.new 'a'
a << Text.new(num.to_s)
a.attributes['href'] = "\#fn:#{num}"
a.attributes['rel'] = 'footnote'
sup << a
sup
end
## Definition lists ###
def to_html_definition_list() add_ws wrap_as_element('dl') end
def to_html_definition() children_to_html end
def to_html_definition_term() add_ws wrap_as_element('dt') end
def to_html_definition_data() add_ws wrap_as_element('dd') end
# FIXME: Ugly code
def to_html_table
align = self.align
num_columns = align.size
head = @children.slice(0, num_columns)
rows = []
i = num_columns
while i<@children.size
rows << @children.slice(i, num_columns)
i += num_columns
end
table = create_html_element 'table',
[:summary, :width, :frame, :rules, :border, :cellspacing, :cellpadding]
thead = Element.new 'thead'
tr = Element.new 'tr'
array_to_html(head).each do |x| tr<<x end
thead << tr
table << thead
tbody = Element.new 'tbody'
rows.each do |row|
tr = Element.new 'tr'
array_to_html(row).each_with_index do |x,i|
x.attributes['style'] ="text-align: #{align[i].to_s};"
tr<<x
end
tbody << tr << Text.new("\n")
end
table << tbody
table
end
def to_html_head_cell; wrap_as_element('th') end
def to_html_cell
if @attributes[:scope]
wrap_as_element('th', [:scope])
else
wrap_as_element('td')
end
end
def to_html_entity
MaRuKu::Out::Latex.need_entity_table
entity_name = self.entity_name
if (e = MaRuKu::Out::Latex::ENTITY_TABLE[entity_name]) && e.html_num
entity_name = e.html_num
end
# Fix for Internet Explorer
if entity_name == 'apos'
entity_name = 39
end
if entity_name.kind_of? Fixnum
# Entity.new(entity_name)
Text.new('&#%d;' % [entity_name], false, nil, true)
else
Text.new('&%s;' % [entity_name])
end
end
def to_html_xml_instr
target = self.target || ''
code = self.code || ''
REXML::Instruction.new(target, code)
end
# Convert each child to html
def children_to_html
array_to_html(@children)
end
def array_to_html(array)
e = []
array.each do |c|
method = c.kind_of?(MDElement) ?
"to_html_#{c.node_type}" : "to_html"
if not c.respond_to?(method)
#raise "Object does not answer to #{method}: #{c.class} #{c.inspect}"
next
end
h = c.send(method)
if h.nil?
raise "Nil html created by method #{method}:\n#{h.inspect}\n"+
" for object #{c.inspect[0,300]}"
end
if h.kind_of?Array
e = e + h #h.each do |hh| e << hh end
else
e << h
end
end
e
end
def to_html_ref_definition; [] end
def to_latex_ref_definition; [] end
end # HTML
end # out
end # MaRuKu

View file

@ -0,0 +1,563 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
class MDDocument
Latex_preamble_enc_cjk =
"\\usepackage[C40]{fontenc}
\\usepackage[cjkjis]{ucs}
\\usepackage[utf8x]{inputenc}"
Latex_preamble_enc_utf8 =
"\\usepackage{ucs}
\\usepackage[utf8x]{inputenc}"
def latex_require_package(p)
if not self.latex_required_packages.include? p
self.latex_required_packages.push p
end
end
# Render as a LaTeX fragment
def to_latex
children_to_latex
end
=begin maruku_doc
Attribute: maruku_signature
Scope: document
Output: html, latex
Summary: Enables Maruku's signature.
Default: true
If false, Maruku does not append a signature to the
generated file.
=end
# Render as a complete LaTeX document
def to_latex_document
body = to_latex
if get_setting(:maruku_signature)
body += render_latex_signature
end
required =
self.latex_required_packages.map {|p|
"\\usepackage{#{p}}\n"
}.join
=begin maruku_doc
Attribute: latex_cjk
Scope: document
Output: latex
Summary: Support for CJK characters.
If the `latex_cjk` attribute is specified, then appropriate headers
are added to the LaTeX preamble to support Japanese fonts.
You have to have these fonts installed -- and this can be a pain.
If `latex_cjk` is specified, this is added to the preamble:
<?mrk puts "ciao" ?>
<?mrk md_codeblock(Maruku::MDDocument::Latex_preamble_enc_cjk) ?>
while the default is to add this:
<?mrk md_codeblock(Maruku::MDDocument::Latex_preamble_enc_utf8) ?>
=end
encoding = get_setting(:latex_cjk) ?
Latex_preamble_enc_cjk : Latex_preamble_enc_utf8
=begin maruku_doc
Attribute: latex_preamble
Scope: document
Output: latex
Summary: User-defined preamble.
If the `latex_preamble` attribute is specified, then its value
will be used as a custom preamble.
For example:
Title: My document
Latex preamble: preamble.tex
will produce:
...
\input{preamble.tex}
...
=end
user_preamble = (file = @doc.attributes[:latex_preamble]) ?
"\\input{#{file}}\n" : ""
"\\documentclass{article}
% Packages required to support encoding
#{encoding}
% Packages required by code
#{required}
% Packages always used
\\usepackage{hyperref}
\\usepackage{xspace}
\\usepackage[usenames,dvipsnames]{color}
\\hypersetup{colorlinks=true,urlcolor=blue}
#{user_preamble}
\\begin{document}
#{body}
\\end{document}
"
end
def render_latex_signature
"\\vfill
\\hrule
\\vspace{1.2mm}
\\begin{tiny}
Created by \\href{http://maruku.rubyforge.org}{Maruku} #{self.nice_date}.
\\end{tiny}"
end
end end
module MaRuKu; module Out; module Latex
def to_latex_hrule; "\n\\vspace{.5em} \\hrule \\vspace{.5em}\n" end
def to_latex_linebreak; "\\linebreak " end
def to_latex_paragraph
children_to_latex+"\n\n"
end
=begin maruku_doc
Title: Input format for colors
Output: latex, html
Related: code_background_color
Admissible formats:
green
#abc
#aabbcc
=end
# \color[named]{name}
# \color[rgb]{1,0.2,0.3}
def latex_color(s, command='color')
if s =~ /^\#(\w\w)(\w\w)(\w\w)$/
r = $1.hex; g = $2.hex; b=$3.hex
# convert from 0-255 to 0.0-1.0
r = r / 255.0; g = g / 255.0; b = b / 255.0;
"\\#{command}[rgb]{%0.2f,%0.2f,%0.2f}" % [r,g,b]
elsif s =~ /^\#(\w)(\w)(\w)$/
r = $1.hex; g = $2.hex; b=$3.hex
# convert from 0-15 to 0.0-1.0
r = r / 15.0; g = g / 15.0; b = b / 15.0;
"\\#{command}[rgb]{%0.2f,%0.2f,%0.2f}" % [r,g,b]
else
"\\#{command}{#{s}}"
end
end
=begin maruku_doc
Attribute: code_show_spaces
Scope: global, document, element
If `true`, shows spaces and tabs in code blocks.
Example:
One space
Two spaces
Tab, space, tab
Tab, tab, tab and all is green!
{:code_show_spaces code_background_color=#ffeedd}
{:markdown}
That will produce:
One space
Two spaces
Tab, space, tab
Tab, tab, tab and all is green!
{:code_show_spaces code_background_color=#ffeedd}
=end
=begin maruku_doc
Attribute: latex_use_listings
Scope: document
Output: latex
Summary: Support for `listings` package.
Related: code_show_spaces, code_background_color, lang, code_lang
If the `latex_use_listings` attribute is specified, then
code block are rendered using the `listings` package.
Otherwise, a standard `verbatim` environment is used.
* If the `lang` attribute for the code block has been specified,
it gets passed to the `listings` package using the `lstset` macro.
The default lang for code blocks is specified through
the `code_lang` attribute.
\lstset{language=ruby}
Please refer to the documentation of the `listings` package for
supported languages.
If a language is not supported, the `listings` package will emit
a warning during the compilation. Just press enter and nothing
wrong will happen.
* If the `code_show_spaces` is specified, than spaces and tabs will
be shown using the macro:
\lstset{showspaces=true,showtabs=true}
* The background color is given by `code_background_color`.
=end
def to_latex_code;
raw_code = self.raw_code
if get_setting(:latex_use_listings)
@doc.latex_require_package('listings')
s = "\\lstset{columns=fixed,frame=shadowbox}"
if get_setting(:code_show_spaces)
s+= "\\lstset{showspaces=true,showtabs=true}\n"
else
s+= "\\lstset{showspaces=false,showtabs=false}\n"
end
color = latex_color get_setting(:code_background_color)
s+= "\\lstset{backgroundcolor=#{color}}\n"
s+= "\\lstset{basicstyle=\\ttfamily\\footnotesize}\n"
lang = self.attributes[:lang] || @doc.attributes[:code_lang] || '{}'
if lang
s += "\\lstset{language=#{lang}}\n"
end
"#{s}\n\\begin{lstlisting}\n#{raw_code}\n\\end{lstlisting}"
else
"\\begin{verbatim}#{raw_code}\\end{verbatim}\n"
end
end
TexHeaders = {
1=>'section',
2=>'subsection',
3=>'subsubsection',
4=>'paragraph'}
def to_latex_header
h = TexHeaders[self.level] || 'paragraph'
title = children_to_latex
if number = section_number
title = number + title
end
if id = self.attributes[:id]
# drop '#' at the beginning
if id[0,1] == '#' then id = [1,id.size] end
%{\\hypertarget{%s}{}\\%s*{{%s}}\\label{%s}\n\n} % [ id, h, title, id ]
else
%{\\%s*{%s}\n\n} % [ h, title]
end
end
def to_latex_ul;
if self.attributes[:toc]
@doc.toc.to_latex
else
wrap_as_environment('itemize')
end
end
def to_latex_quote; wrap_as_environment('quote') end
def to_latex_ol; wrap_as_environment('enumerate') end
def to_latex_li;
"\\item #{children_to_latex}\n"
end
def to_latex_li_span;
"\\item #{children_to_latex}\n"
end
def to_latex_strong
"\\textbf{#{children_to_latex}}"
end
def to_latex_emphasis
"\\emph{#{children_to_latex}}"
end
def wrap_as_span(c)
"{#{c} #{children_to_latex}}"
end
def wrap_as_environment(name)
"\\begin{#{name}}%
#{children_to_latex}
\\end{#{name}}\n"
end
SAFE_CHARS = Set.new((?a..?z).to_a + (?A..?Z).to_a)
# the ultimate escaping
# (is much better than using \verb)
def latex_escape(source)
s="";
source.each_byte do |b|
if b == ?\
s << '~'
elsif SAFE_CHARS.include? b
s << b
else
s += "\\char%d" % b
end
end
s
end
def to_latex_inline_code;
source = self.raw_code
# Convert to printable latex chars
s = latex_escape(source)
color = get_setting(:code_background_color)
colorspec = latex_color(color, 'colorbox')
"#{colorspec}{\\tt #{s}}"
end
def to_latex_immediate_link
url = self.url
text = url.gsub(/^mailto:/,'') # don't show mailto
# gsub('~','$\sim$')
text = latex_escape(text)
if url[0,1] == '#'
url = url[1,url.size]
return "\\hyperlink{#{url}}{#{text}}"
else
return "\\href{#{url}}{#{text}}"
end
end
def to_latex_im_link
url = self.url
if url[0,1] == '#'
url = url[1,url.size]
return "\\hyperlink{#{url}}{#{children_to_latex}}"
else
return "\\href{#{url}}{#{children_to_latex}}"
end
end
def to_latex_link
id = self.ref_id
ref = @doc.refs[id]
if not ref
$stderr.puts "Could not find id = '#{id}'"
return children_to_latex
else
url = ref[:url]
#title = ref[:title] || 'no title'
if url[0,1] == '#'
url = url[1,url.size]
return "\\hyperlink{#{url}}{#{children_to_latex}}"
else
return "\\href{#{url}}{#{children_to_latex}}"
end
end
end
def to_latex_email_address
email = self.email
"\\href{mailto:#{email}}{#{latex_escape(email)}}"
end
def to_latex_table
align = self.align
num_columns = align.size
head = @children.slice(0, num_columns)
rows = []
i = num_columns
while i<@children.size
rows << @children.slice(i, num_columns)
i+=num_columns
end
h = {:center=>'c',:left=>'l',:right=>'r'}
align_string = align.map{|a| h[a]}.join('|')
s = "\\begin{tabular}{#{align_string}}\n"
s += array_to_latex(head, '&') + "\\\\" +"\n"
s += "\\hline \n"
rows.each do |row|
s += array_to_latex(row, '&') + "\\\\" +"\n"
end
s += "\\end{tabular}"
# puts table in its own paragraph
s += "\n\n"
s
end
def to_latex_head_cell; children_to_latex end
def to_latex_cell; children_to_latex end
def to_latex_footnote_reference
id = self.footnote_id
f = @doc.footnotes[id]
if f
"\\footnote{#{f.children_to_latex.strip}} "
else
$stderr.puts "Could not find footnote '#{fid}'"
end
end
def to_latex_raw_html
#'{\bf Raw HTML removed in latex version }'
""
end
## Definition lists ###
def to_latex_definition_list
s = "\\begin{description}\n"
s += children_to_latex
s += "\\end{description}\n"
s
end
def to_latex_definition
terms = self.terms
definitions = self.definitions
s = ""
terms.each do |t|
s +="\n\\item[#{t.children_to_latex}] "
end
definitions.each do |d|
s += "#{d.children_to_latex} \n"
end
s
end
def to_latex_abbr
children_to_latex
end
def to_latex_image
id = self.ref_id
ref = @doc.refs[id]
if not ref
maruku_error "Could not find ref #{id.inspect} for image.\n"+
"Available are: #{@docs.refs.keys.inspect}"
# $stderr.puts "Could not find id = '#{id}'"
""
else
url = ref[:url]
$stderr.puts "Images not supported yet (#{url})"
# "{\\bf Images not supported yet (#{latex_escape(url)})}"
""
end
end
# Convert each child to html
def children_to_latex
array_to_latex(@children)
end
def array_to_latex(array, join_char='')
e = []
array.each do |c|
method = c.kind_of?(MDElement) ?
"to_latex_#{c.node_type}" : "to_latex"
if not c.respond_to?(method)
# raise "Object does not answer to #{method}: #{c.class} #{c.inspect[0,100]}"
next
end
h = c.send(method)
if h.nil?
raise "Nil html for #{c.inspect} created with method #{method}"
end
if h.kind_of?Array
e = e + h
else
e << h
end
end
# puts a space after commands if needed
# e.each_index do |i|
# if e[i] =~ /\\\w+\s*$/ # command
# if (s=e[i+1]) && s[0] == ?\ # space
# e[i] = e[i] + "\\ "
# end
# end
# end
e.join(join_char)
end
end end end # MaRuKu::Out::Latex

View file

@ -0,0 +1,367 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'rexml/document'
module MaRuKu; module Out; module Latex
include REXML
def to_latex_entity
MaRuKu::Out::Latex.need_entity_table
entity_name = self.entity_name
entity = ENTITY_TABLE[entity_name]
if not entity
maruku_error "I don't know how to translate entity '#{entity_name}' "+
"to LaTeX."
return ""
end
replace = entity.latex_string
entity.latex_packages.each do |p|
@doc.latex_require_package p
end
# if replace =~ /^\\/
# replace = replace + " "
# end
if replace
return replace + "{}"
else
tell_user "Cannot translate entity #{entity_name.inspect} to LaTeX."
return entity_name
end
end
class LatexEntity
safe_attr_accessor :html_num, Fixnum
safe_attr_accessor :html_entity, String
safe_attr_accessor :latex_string, String
safe_attr_accessor :latex_packages, Array
end
def Latex.need_entity_table
Latex.init_entity_table if ENTITY_TABLE.empty?
end
# create hash @@entity_to_latex
def Latex.init_entity_table
# $stderr.write "Creating entity table.."
# $stderr.flush
doc = Document.new XML_TABLE
doc.elements.each("//char") do |c|
num = c.attributes['num'].to_i
name = c.attributes['name']
package = c.attributes['package']
convert = c.attributes['convertTo']
convert.gsub!(/@DOUBLEQUOT/,'"')
convert.gsub!(/@QUOT/,"'")
convert.gsub!(/@GT/,">")
convert.gsub!(/@LT/,"<")
convert.gsub!(/@AMP/,"&")
convert.freeze
e = LatexEntity.new
e.html_num = num
e.html_entity = name
e.latex_string = convert
e.latex_packages = package ? package.split : []
ENTITY_TABLE[num] = e
ENTITY_TABLE[name] = e
end
# $stderr.puts "..done."
end
ENTITY_TABLE = {}
# The following is a conversion chart for html elements, courtesy of
# text2html
XML_TABLE ="
<chars>
<char num='913' name='Alpha' convertTo='$A$' />
<char num='914' name='Beta' convertTo='$B$' />
<char num='915' name='Gamma' convertTo='$\\Gamma$' />
<char num='916' name='Delta' convertTo='$\\Delta$' />
<char num='917' name='Epsilon' convertTo='$E$' />
<char num='918' name='Zeta' convertTo='$Z$' />
<char num='919' name='Eta' convertTo='$H$' />
<char num='920' name='Theta' convertTo='$\\Theta$' />
<char num='921' name='Iota' convertTo='$I$' />
<char num='922' name='Kappa' convertTo='$K$' />
<char num='923' name='Lambda' convertTo='$\\Lambda$' />
<char num='924' name='Mu' convertTo='$M$' />
<char num='925' name='Nu' convertTo='$N$' />
<char num='926' name='Xi' convertTo='$\\Xi$' />
<char num='927' name='Omicron' convertTo='$O$' />
<char num='928' name='Pi' convertTo='$\\Pi$' />
<char num='929' name='Rho' convertTo='$P$' />
<char num='931' name='Sigma' convertTo='$\\Sigma$' />
<char num='932' name='Tau' convertTo='$T$' />
<char num='933' name='Upsilon' convertTo='$Y$' />
<char num='934' name='Phi' convertTo='$\\Phi$' />
<char num='935' name='Chi' convertTo='$X$' />
<char num='936' name='Psi' convertTo='$\\Psi$' />
<char num='937' name='Omega' convertTo='$\\Omega$' />
<char num='945' name='alpha' convertTo='$\\alpha$' />
<char num='946' name='beta' convertTo='$\\beta$' />
<char num='947' name='gamma' convertTo='$\\gamma$' />
<char num='948' name='delta' convertTo='$\\delta$' />
<char num='949' name='epsilon' convertTo='$\\epsilon$' />
<char num='950' name='zeta' convertTo='$\\zeta$' />
<char num='951' name='eta' convertTo='$\\eta$' />
<char num='952' name='theta' convertTo='$\\theta$' />
<char num='953' name='iota' convertTo='$\\iota$' />
<char num='954' name='kappa' convertTo='$\\kappa$' />
<char num='955' name='lambda' convertTo='$\\lambda$' />
<char num='956' name='mu' convertTo='$\\mu$' />
<char num='957' name='nu' convertTo='$\\nu$' />
<char num='958' name='xi' convertTo='$\\xi$' />
<char num='959' name='omicron' convertTo='$o$' />
<char num='960' name='pi' convertTo='$\\pi$' />
<char num='961' name='rho' convertTo='$\\rho$' />
<char num='963' name='sigma' convertTo='$\\sigma$' />
<char num='964' name='tau' convertTo='$\\tau$' />
<char num='965' name='upsilon' convertTo='$\\upsilon$' />
<char num='966' name='phi' convertTo='$\\phi$' />
<char num='967' name='chi' convertTo='$\\chi$' />
<char num='968' name='psi' convertTo='$\\psi$' />
<char num='969' name='omega' convertTo='$\\omega$' />
<char num='962' name='sigmaf' convertTo='$\\varsigma$' />
<char num='977' name='thetasym' convertTo='$\\vartheta$' />
<char num='982' name='piv' convertTo='$\\varpi$' />
<char num='8230' name='hellip' convertTo='\\ldots' />
<char num='8242' name='prime' convertTo='$\\prime$' />
<char num='8254' name='oline' convertTo='-' />
<char num='8260' name='frasl' convertTo='/' />
<char num='8472' name='weierp' convertTo='$\\wp$' />
<char num='8465' name='image' convertTo='$\\Im$' />
<char num='8476' name='real' convertTo='$\\Re$' />
<char num='8501' name='alefsym' convertTo='$\\aleph$' />
<char num='8226' name='bull' convertTo='$\\bullet$' />
<char num='8482' name='trade' convertTo='$^{\\rm TM}$' /> <!-- \texttrademark -->
<char num='8592' name='larr' convertTo='$\\leftarrow$' />
<char num='8594' name='rarr' convertTo='$\\rightarrow$' />
<char num='8593' name='uarr' convertTo='$\\uparrow$' />
<char num='8595' name='darr' convertTo='$\\downarrow$' />
<char num='8596' name='harr' convertTo='$\\leftrightarrow$' />
<char num='8629' name='crarr' convertTo='$\\hookleftarrow$' />
<char num='8657' name='uArr' convertTo='$\\Uparrow$' />
<char num='8659' name='dArr' convertTo='$\\Downarrow$' />
<char num='8656' name='lArr' convertTo='$\\Leftarrow$' />
<char num='8658' name='rArr' convertTo='$\\Rightarrow$' />
<char num='8660' name='hArr' convertTo='$\\Leftrightarrow$' />
<char num='8704' name='forall' convertTo='$\\forall$' />
<char num='8706' name='part' convertTo='$\\partial$' />
<char num='8707' name='exist' convertTo='$\\exists$' />
<char num='8709' name='empty' convertTo='$\\emptyset$' />
<char num='8711' name='nabla' convertTo='$\\nabla$' />
<char num='8712' name='isin' convertTo='$\\in$' />
<char num='8715' name='ni' convertTo='$\\ni$' />
<char num='8713' name='notin' convertTo='$\\notin$' />
<char num='8721' name='sum' convertTo='$\\sum$' />
<char num='8719' name='prod' convertTo='$\\prod$' />
<char num='8722' name='minus' convertTo='$-$' />
<char num='8727' name='lowast' convertTo='$\\ast$' />
<char num='8730' name='radic' convertTo='$\\surd$' />
<char num='8733' name='prop' convertTo='$\\propto$' />
<char num='8734' name='infin' convertTo='$\\infty$' />
<char num='8736' name='ang' convertTo='$\\angle$' />
<char num='8743' name='and' convertTo='$\\wedge$' />
<char num='8744' name='or' convertTo='$\\vee$' />
<char num='8745' name='cup' convertTo='$\\cup$' />
<char num='8746' name='cap' convertTo='$\\cap$' />
<char num='8747' name='int' convertTo='$\\int$' />
<char num='8756' name='there4' convertTo='$\\therefore$' package='amssymb' /> <!-- only AMS -->
<char num='8764' name='sim' convertTo='$\\sim$' />
<char num='8776' name='asymp' convertTo='$\\approx$' />
<char num='8773' name='cong' convertTo='$\\cong$' />
<char num='8800' name='ne' convertTo='$\\neq$' />
<char num='8801' name='equiv' convertTo='$\\equiv$' />
<char num='8804' name='le' convertTo='$\\leq$' />
<char num='8805' name='ge' convertTo='$\\geq$' />
<char num='8834' name='sub' convertTo='$\\subset$' />
<char num='8835' name='sup' convertTo='$\\supset$' />
<!-- <char num='8838' name='sube' convertTo='$\\subseteq$' />-->
<char num='8839' name='supe' convertTo='$\\supseteq$' />
<!-- <char num='8836' name='nsub' convertTo='$\\nsubset$' /> <!-- only AMS -->
<char num='8853' name='oplus' convertTo='$\\oplus$' />
<char num='8855' name='otimes' convertTo='$\\otimes$' />
<char num='8869' name='perp' convertTo='$\\perp$' />
<char num='8901' name='sdot' convertTo='$\\cdot$' />
<char num='8968' name='rceil' convertTo='$\\rceil$' />
<char num='8969' name='lceil' convertTo='$\\lceil$' />
<char num='8970' name='lfloor' convertTo='$\\lfloor$' />
<char num='8971' name='rfloor' convertTo='$\\rfloor$' />
<char num='9001' name='rang' convertTo='$\\rangle$' />
<char num='9002' name='lang' convertTo='$\\langle$' />
<char num='9674' name='loz' convertTo='$\\lozenge$' package='amssymb' /> <!-- only AMS -->
<char num='9824' name='spades' convertTo='$\\spadesuit$' />
<char num='9827' name='clubs' convertTo='$\\clubsuit$' />
<char num='9829' name='hearts' convertTo='$\\heartsuit$' />
<char num='9830' name='diams' convertTo='$\\diamondsuit$' />
<char num='38' name='amp' convertTo='\\@AMP' />
<!-- <char num='34' name='quot' convertTo='\\@DOUBLEQUOT' /> XXX -->
<char num='34' name='quot' convertTo='\"' />
<char num='39' name='apos' convertTo=\"'\" />
<char num='169' name='copy' convertTo='\\copyright' />
<char num='60' name='lt' convertTo='$@LT$' />
<char num='62' name='gt' convertTo='$@GT$' />
<char num='338' name='OElig' convertTo='\\OE' />
<char num='339' name='oelig' convertTo='\\oe' />
<char num='352' name='Scaron' convertTo='\\v{S}' />
<char num='353' name='scaron' convertTo='\\v{s}' />
<char num='376' name='Yuml' convertTo='\\\"Y' />
<char num='710' name='circ' convertTo='\\textasciicircum' />
<char num='732' name='tilde' convertTo='\\textasciitilde' />
<char num='8211' name='ndash' convertTo='--' />
<char num='8212' name='mdash' convertTo='---' />
<char num='8216' name='lsquo' convertTo='`' />
<char num='8217' name='rsquo' convertTo=\"'\" /> <!-- XXXX -->
<char num='8220' name='ldquo' convertTo='``' />
<char num='8221' name='rdquo' convertTo=\"''\" /> <!-- XXXX -->
<char num='8224' name='dagger' convertTo='\\dag' />
<char num='8225' name='Dagger' convertTo='\\ddag' />
<char num='8240' name='permil' convertTo='\\permil' package='wasysym' /> <!-- wasysym package -->
<char num='8364' name='euro' convertTo='\\euro' package='eurosym' /> <!-- eurosym package -->
<char num='8249' name='lsaquo' convertTo='\\guilsinglleft' package='aeguill'/>
<char num='8250' name='rsaquo' convertTo='\\guilsinglright' package='aeguill' />
<!-- <char num='160' name='nbsp' convertTo='\\nolinebreak' />-->
<char num='160' name='nbsp' convertTo='~' />
<char num='161' name='iexcl' convertTo='\\textexclamdown' />
<char num='163' name='pound' convertTo='\\pounds' />
<char num='164' name='curren' convertTo='\\currency' package='wasysym' /> <!-- wasysym package -->
<char num='165' name='yen' convertTo='\\textyen' package='textcomp'/> <!-- textcomp -->
<char num='166' name='brvbar' convertTo='\\brokenvert' /> <!-- wasysym -->
<char num='167' name='sect' convertTo='\\S' />
<char num='171' name='laquo' convertTo='\\guillemotleft' package='aeguill'/>
<char num='187' name='raquo' convertTo='\\guillemotright' package='aeguill'/>
<char num='174' name='reg' convertTo='\\textregistered' />
<char num='170' name='ordf' convertTo='\\textordfeminine' />
<char num='172' name='not' convertTo='$\\neg$' />
<!-- <char num='176' name='deg' convertTo='$\\degree$' /> <!-- mathabx -->
<char num='176' name='deg' convertTo='\\textdegree' package='textcomp'/>
<char num='177' name='plusmn' convertTo='$\\pm$' />
<char num='180' name='acute' convertTo='@QUOT' />
<char num='181' name='micro' convertTo='$\\mu$' />
<char num='182' name='para' convertTo='\\P' />
<char num='183' name='middot' convertTo='$\\cdot$' />
<char num='186' name='ordm' convertTo='\\textordmasculine' />
<char num='162' name='cent' convertTo='\\cent' package='wasysym' />
<char num='185' name='sup1' convertTo='$^1$' />
<char num='178' name='sup2' convertTo='$^2$' />
<char num='179' name='sup3' convertTo='$^3$' />
<char num='189' name='frac12' convertTo='$\\frac{1}{2}$' />
<char num='188' name='frac14' convertTo='$\\frac{1}{4}$' />
<char num='190' name='frac34' convertTo='$\\frac{3}{4}$' />
<char num='192' name='Agrave' convertTo='\\`A' />
<char num='193' name='Aacute' convertTo='\\@QUOTA' />
<char num='194' name='Acirc' convertTo='\\^A' />
<char num='195' name='Atilde' convertTo='\\~A' />
<char num='196' name='Auml' convertTo='\\@DOUBLEQUOTA' />
<char num='197' name='Aring' convertTo='\\AA' />
<char num='198' name='AElig' convertTo='\\AE' />
<char num='199' name='Ccedil' convertTo='\\c{C}' />
<char num='200' name='Egrave' convertTo='\\`E' />
<char num='201' name='Eacute' convertTo='\\@QUOTE' />
<char num='202' name='Ecirc' convertTo='\\^E' />
<char num='203' name='Euml' convertTo='\\@DOUBLEQUOTE' />
<char num='204' name='Igrave' convertTo='\\`I' />
<char num='205' name='Iacute' convertTo='\\@QUOTI' />
<char num='206' name='Icirc' convertTo='\\^I' />
<char num='207' name='Iuml' convertTo='\\\"I' />
<char num='208' name='ETH' convertTo='$\\eth$' /> <!-- AMS -->
<char num='209' name='Ntilde' convertTo='\\~N' />
<char num='210' name='Ograve' convertTo='\\`O' />
<char num='211' name='Oacute' convertTo='\\@QUOT O' />
<char num='212' name='Ocirc' convertTo='\\^O' />
<char num='213' name='Otilde' convertTo='\\~O' />
<char num='214' name='Ouml' convertTo='\\@DOUBLEQUOTO' />
<char num='215' name='times' convertTo='$\\times$' />
<char num='216' name='Oslash' convertTo='\\O' />
<char num='217' name='Ugrave' convertTo='\\`U' />
<char num='218' name='Uacute' convertTo='\\@QUOTU' />
<char num='219' name='Ucirc' convertTo='\\^U' />
<char num='220' name='Uuml' convertTo='\\@DOUBLEQUOTU' />
<char num='221' name='Yacute' convertTo='\\@QUOTY' />
<char num='223' name='szlig' convertTo='\\ss' />
<char num='224' name='agrave' convertTo='\\`a' />
<char num='225' name='aacute' convertTo='\\@QUOTa' />
<char num='226' name='acirc' convertTo='\\^a' />
<char num='227' name='atilde' convertTo='\\~a' />
<char num='228' name='auml' convertTo='\\@DOUBLEQUOTa' />
<char num='229' name='aring' convertTo='\\aa' />
<char num='230' name='aelig' convertTo='\\ae' />
<char num='231' name='ccedil' convertTo='\\c{c}' />
<char num='232' name='egrave' convertTo='\\`e' />
<char num='233' name='eacute' convertTo='\\@QUOTe' />
<char num='234' name='ecirc' convertTo='\\^e' />
<char num='235' name='euml' convertTo='\\@DOUBLEQUOTe' />
<char num='236' name='igrave' convertTo='\\`i' />
<char num='237' name='iacute' convertTo='\\@QUOTi' />
<char num='238' name='icirc' convertTo='\\^i' />
<char num='239' name='iuml' convertTo='\\@DOUBLEQUOTi' />
<char num='240' name='eth' convertTo='$\\eth$' package='amssymb'/> <!-- -->
<char num='241' name='ntilde' convertTo='\\~n' />
<char num='242' name='ograve' convertTo='\\`o' />
<char num='243' name='oacute' convertTo='\\@QUOTo' />
<char num='244' name='ocirc' convertTo='\\^o' />
<char num='245' name='otilde' convertTo='\\~o' />
<char num='246' name='ouml' convertTo='\\@DOUBLEQUOTo' />
<!-- <char num='247' name='divide' convertTo='$\\divide$' /> -->
<char num='248' name='oslash' convertTo='\\o' />
<char num='249' name='ugrave' convertTo='\\`u' />
<char num='250' name='uacute' convertTo='\\@QUOTu' />
<char num='251' name='ucirc' convertTo='\\^u' />
<char num='252' name='uuml' convertTo='\\@DOUBLEQUOTu' />
<char num='253' name='yacute' convertTo='\\@QUOTy' />
<char num='255' name='yuml' convertTo='\\@DOUBLEQUOTy' />
<char num='222' name='THORN' convertTo='\\Thorn' package='wasysym' />
<char num='254' name='thorn' convertTo='\\thorn' package='wasysym' />
</chars>"
end end end

View file

@ -0,0 +1,64 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class String
# These are TeX's special characters
LATEX_ADD_SLASH = [ ?{, ?}, ?$, ?&, ?#, ?_, ?%]
# These, we transform to {\tt \char<ascii code>}
LATEX_TO_CHARCODE = [ ?^, ?~, ?>,?<]
def escape_to_latex(s)
s2 = ""
s.each_byte do |b|
if LATEX_TO_CHARCODE.include? b
s2 += "{\\tt \\char#{b}}"
elsif LATEX_ADD_SLASH.include? b
s2 << ?\\ << b
elsif b == ?\\
# there is no backslash in cmr10 fonts
s2 += "$\\backslash$"
else
s2 << b
end
end
s2
end
# escapes special characters
def to_latex
s = escape_to_latex(self)
OtherGoodies.each do |k, v|
s.gsub!(k, v)
end
s
end
# other things that are good on the eyes
OtherGoodies = {
/(\s)LaTeX/ => '\1\\LaTeX\\xspace ', # XXX not if already \LaTeX
# 'HTML' => '\\textsc{html}\\xspace ',
# 'PDF' => '\\textsc{pdf}\\xspace '
}
end

View file

@ -0,0 +1,164 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class String
# XXX: markdown escaping
def to_md(c=nil)
to_s
end
# " andrea censi " => [" andrea ", "censi "]
def mysplit
split.map{|x| x+" "}
end
end
module MaRuKu; module Out; module Markdown
DefaultLineLength = 40
def to_md(context={})
children_to_md(context)
end
def to_md_paragraph(context)
line_length = context[:line_length] || DefaultLineLength
wrap(@children, line_length, context)+"\n"
end
def to_md_li_span(context)
len = (context[:line_length] || DefaultLineLength) - 2
s = add_tabs(wrap(@children, len-2, context), 1, ' ')
s[0] = ?*
s + "\n"
end
def to_md_abbr_def(context)
"*[#{self.abbr}]: #{self.text}\n"
end
def to_md_ol(context)
len = (context[:line_length] || DefaultLineLength) - 2
md = ""
self.children.each_with_index do |li, i|
s = add_tabs(w=wrap(li.children, len-2, context), 1, ' ')+"\n"
s[0,4] = "#{i+1}. "[0,4]
# puts w.inspect
md += s
end
md + "\n"
end
def to_md_ul(context)
len = (context[:line_length] || DefaultLineLength) - 2
md = ""
self.children.each_with_index do |li, i|
w = wrap(li.children, len-2, context)
# puts "W: "+ w.inspect
s = add_indent(w)
# puts "S: " +s.inspect
s[0,1] = "-"
md += s
end
md + "\n"
end
def add_indent(s,char=" ")
t = s.split("\n").map{|x| char+x }.join("\n")
s << ?\n if t[-1] == ?\n
s
end
# Convert each child to html
def children_to_md(context)
array_to_md(@children, context)
end
def wrap(array, line_length, context)
out = ""
line = ""
array.each do |c|
if c.kind_of?(MDElement) && c.node_type == :linebreak
out << line.strip << " \n"; line="";
next
end
pieces =
if c.kind_of? String
c.to_md.mysplit
else
[c.to_md(context)].flatten
end
# puts "Pieces: #{pieces.inspect}"
pieces.each do |p|
if p.size + line.size > line_length
out << line.strip << "\n";
line = ""
end
line << p
end
end
out << line.strip << "\n" if line.size > 0
out << ?\n if not out[-1] == ?\n
out
end
def array_to_md(array, context, join_char='')
e = []
array.each do |c|
method = c.kind_of?(MDElement) ?
"to_md_#{c.node_type}" : "to_md"
if not c.respond_to?(method)
#raise "Object does not answer to #{method}: #{c.class} #{c.inspect[0,100]}"
# tell_user "Using default for #{c.node_type}"
method = 'to_md'
end
# puts "#{c.inspect} created with method #{method}"
h = c.send(method, context)
if h.nil?
raise "Nil md for #{c.inspect} created with method #{method}"
end
if h.kind_of?Array
e = e + h
else
e << h
end
end
e.join(join_char)
end
end end end
module MaRuKu; class MDDocument
alias old_md to_md
def to_md(context={})
s = old_md(context)
# puts s
s
end
end end

View file

@ -0,0 +1,53 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
class MDElement
# Strips all formatting from the string
def to_s
children_to_s
end
def children_to_s
@children.join
end
# Generate an id for headers. Assumes @children is set.
def generate_id
title = children_to_s
title.gsub!(/ /,'_')
title.downcase!
title.gsub!(/[^\w_]/,'')
title.strip!
if title.size == 0
$uid ||= 0
$uid += 1
title = "id#{$uid}"
end
title
end
end
end

View file

@ -0,0 +1,184 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
# Boring stuff with strings.
module MaRuKu; module Strings
def add_tabs(s,n=1,char="\t")
s.split("\n").map{|x| char*n+x }.join("\n")
end
TabSize = 4;
def split_lines(s)
s.split("\n")
end
# This parses email headers. Returns an hash.
#
# +hash['data']+ is the message.
#
# Keys are downcased, space becomes underscore, converted to symbols.
#
# My key: true
#
# becomes:
#
# {:my_key => true}
#
def parse_email_headers(s)
keys={}
match = (s =~ /((\w[\w\s]+: .*\n)+)\n/)
if match != 0
keys[:data] = s
else
keys[:data] = $'
headers = $1
headers.split("\n").each do |l|
k, v = l.split(':')
k, v = normalize_key_and_value(k, v)
k = k.to_sym
# puts "K = #{k}, V=#{v}"
keys[k] = v
end
end
keys
end
# Keys are downcased, space becomes underscore, converted to symbols.
def normalize_key_and_value(k,v)
v = v ? v.strip : true # no value defaults to true
k = k.strip
# check synonyms
v = true if ['yes','true'].include?(v.to_s.downcase)
v = false if ['no','false'].include?(v.to_s.downcase)
k = k.downcase.gsub(' ','_')
return k, v
end
# Returns the number of leading spaces, considering that
# a tab counts as `TabSize` spaces.
def number_of_leading_spaces(s)
n=0; i=0;
while i < s.size
c = s[i,1]
if c == ' '
i+=1; n+=1;
elsif c == "\t"
i+=1; n+=TabSize;
else
break
end
end
n
end
# This returns the position of the first real char in a list item
#
# For example:
# '*Hello' # => 1
# '* Hello' # => 2
# ' * Hello' # => 3
# ' * Hello' # => 5
# '1.Hello' # => 2
# ' 1. Hello' # => 5
def spaces_before_first_char(s)
case s.md_type
when :ulist
i=0;
# skip whitespace if present
while s[i,1] =~ /\s/; i+=1 end
# skip indicator (+, -, *)
i+=1
# skip optional whitespace
while s[i,1] =~ /\s/; i+=1 end
return i
when :olist
i=0;
# skip whitespace
while s[i,1] =~ /\s/; i+=1 end
# skip digits
while s[i,1] =~ /\d/; i+=1 end
# skip dot
i+=1
# skip whitespace
while s[i,1] =~ /\s/; i+=1 end
return i
else
tell_user "BUG (my bad): '#{s}' is not a list"
0
end
end
# Counts the number of leading '#' in the string
def num_leading_hashes(s)
i=0;
while i<(s.size-1) && (s[i,1]=='#'); i+=1 end
i
end
# Strips initial and final hashes
def strip_hashes(s)
s = s[num_leading_hashes(s), s.size]
i = s.size-1
while i > 0 && (s[i,1] =~ /(#|\s)/); i-=1; end
s[0, i+1].strip
end
# removes initial quote
def unquote(s)
s.gsub(/^>\s?/,'')
end
# toglie al massimo n caratteri
def strip_indent(s, n)
i = 0
while i < s.size && n>0
c = s[i,1]
if c == ' '
n-=1;
elsif c == "\t"
n-=TabSize;
else
break
end
i+=1
end
s[i, s.size-1]
end
def dbg_describe_ary(a, prefix='')
i = 0
a.each do |l|
puts "#{prefix} (#{i+=1})# #{l.inspect}"
end
end
def force_linebreak?(l)
l =~ / $/
end
end
end

View file

@ -0,0 +1,165 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class Module
def safe_attr_accessor1(symbol, klass)
attr_reader symbol
code = <<-EOF
def #{symbol}=(val)
if not val.kind_of? #{klass}
s = "\nCould not assign an object of type \#{val.class} to #{symbol}.\n\n"
s += "Tried to assign object of class \#{val.class}:\n"+
"\#{val.inspect}\n"+
"to \#{self.class}::#{symbol} constrained to be of class #{klass}.\n"
raise s
end
@#{symbol} = val
end
EOF
module_eval code
end
def safe_attr_accessor2(symbol, klass)
attr_accessor symbol
end
alias safe_attr_accessor safe_attr_accessor2
end
module MaRuKu
# I did not want to have a class for each possible element.
# Instead I opted to have only the class "MDElement"
# that represents eveything in the document (paragraphs, headers, etc).
#
# You can tell what it is by the variable `node_type`.
#
# In the instance-variable `children` there are the children. These
# can be of class 1) String or 2) MDElement.
#
# The @doc variable points to the document to which the MDElement
# belongs (which is an instance of Maruku, subclass of MDElement).
#
# Attributes are contained in the hash `attributes`.
# Keys are symbols (downcased, with spaces substituted by underscores)
#
# For example, if you write in the source document.
#
# Title: test document
# My property: value
#
# content content
#
# You can access `value` by writing:
#
# @doc.attributes[:my_property] # => 'value'
#
# from whichever MDElement in the hierarchy.
#
class MDElement
# See helpers.rb for the list of allowed #node_type values
safe_attr_accessor :node_type, Symbol
# Children are either Strings or MDElement
safe_attr_accessor :children, Array
# An attribute list, may not be nil
safe_attr_accessor :al, Array #Maruku::AttributeList
# These are the processed attributes
safe_attr_accessor :attributes, Hash
# Reference of the document (which is of class Maruku)
attr_accessor :doc
def initialize(node_type=:unset, children=[], meta={},
al=MaRuKu::AttributeList.new )
super();
self.children = children
self.node_type = node_type
@attributes = {}
meta.each do |symbol, value|
self.instance_eval "
def #{symbol}; @#{symbol}; end
def #{symbol}=(val); @#{symbol}=val; end"
self.send "#{symbol}=", value
end
self.al = al || AttributeList.new
self.meta_priv = meta
end
attr_accessor :meta_priv
def ==(o)
ok = o.kind_of?(MDElement) &&
(self.node_type == o.node_type) &&
(self.meta_priv == o.meta_priv) &&
(self.children == o.children)
if not ok
# puts "This:\n"+self.inspect+"\nis different from\n"+o.inspect+"\n\n"
end
ok
end
end
# This represents the whole document and holds global data.
class MDDocument
safe_attr_accessor :refs, Hash
safe_attr_accessor :footnotes, Hash
# This is an hash. The key might be nil.
safe_attr_accessor :abbreviations, Hash
# Attribute lists definition
safe_attr_accessor :ald, Hash
# The order in which footnotes are used. Contains the id.
safe_attr_accessor :footnotes_order, Array
safe_attr_accessor :latex_required_packages, Array
def initialize(s=nil)
super(:document)
@doc = self
self.refs = {}
self.footnotes = {}
self.footnotes_order = []
self.abbreviations = {}
self.ald = {}
self.latex_required_packages = []
parse_doc(s) if s
end
end
end # MaRuKu

View file

@ -0,0 +1,87 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class String
def inspect_more(a=nil,b=nil)
inspect
end
end
class Object
def inspect_more(a=nil,b=nil)
inspect
end
end
class Array
def inspect_more(compact, join_string, add_brackets=true)
s = map {|x|
x.kind_of?(String) ? x.inspect :
x.kind_of?(MaRuKu::MDElement) ? x.inspect(compact) :
(raise "WTF #{x.class} #{x.inspect}")
}.join(join_string)
add_brackets ? "[#{s}]" : s
end
end
class Hash
def inspect_ordered(a=nil,b=nil)
"{"+keys.map{|x|x.to_s}.sort.map{|x|x.to_sym}.
map{|k| k.inspect + "=>"+self[k].inspect}.join(',')+"}"
end
end
module MaRuKu
class MDElement
def inspect(compact=true)
if compact
i2 = inspect2
return i2 if i2
end
"md_el(:%s,%s,%s,%s)" %
[
self.node_type,
children_inspect(compact),
@meta_priv.inspect_ordered,
self.al.inspect
]
end
def children_inspect(compact=true)
s = @children.inspect_more(compact,', ')
if @children.empty?
"[]"
elsif s.size < 70
s
else
"[\n"+
add_tabs(@children.inspect_more(compact,",\n",false))+
"\n]"
end
end
end
end

View file

@ -0,0 +1,61 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
class MDElement
# Yields to each element of specified node_type
# All elements if e_node_type is nil.
def each_element(e_node_type=nil, &block)
@children.each do |c|
if c.kind_of? MDElement
if (not e_node_type) || (e_node_type == c.node_type)
block.call c
end
c.each_element(e_node_type, &block)
end
end
end
# Apply passed block to each String in the hierarchy.
def replace_each_string(&block)
for c in @children
if c.kind_of? MDElement
c.replace_each_string(&block)
end
end
processed = []
until @children.empty?
c = @children.shift
if c.kind_of? String
result = block.call(c)
[*result].each do |e| processed << e end
else
processed << c
end
end
@children = processed
end
end
end

View file

@ -0,0 +1,82 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'maruku'
#require 'bluecloth'
data = $stdin.read
num = 10
if ARGV.size > 0 && ((n=ARGV[0].to_i) != 0)
num = n
end
methods =
[
[Maruku, :to_html],
# [BlueCloth, :to_html],
[Maruku, :to_latex]
]
#methods = [[Maruku, :class]]
#num = 10
stats =
methods .map do |c, method|
puts "Computing for #{c}"
start = Time.now
doc = nil
for i in 1..num
$stdout.write "#{i} "; $stdout.flush
doc = c.new(data)
end
stop = Time.now
parsing = (stop-start)/num
start = Time.now
for i in 1..num
$stdout.write "#{i} "; $stdout.flush
s = doc.send method
end
stop = Time.now
rendering = (stop-start)/num
puts ("%s (%s): parsing %0.2f sec + rendering %0.2f sec "+
"= %0.2f sec ") % [c, method, parsing,rendering,parsing+rendering]
[c, method, parsing, rendering]
end
puts "\n\n\n"
stats.each do |x| x.push(x[2]+x[3]) end
max = stats.map{|x|x[4]}.max
stats.sort! { |x,y| x[4] <=> y[4] } . reverse!
for c, method, parsing, rendering, tot in stats
puts ("%20s: parsing %0.2f sec + rendering %0.2f sec "+
"= %0.2f sec (%0.2fx)") %
["#{c} (#{method})", parsing,rendering,tot,max/tot]
end

View file

@ -0,0 +1,363 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'maruku'
require 'maruku/ext/math'
module MaRuKu; module Tests
# 5 accented letters in italian, encoded as UTF-8
AccIta8 = "\303\240\303\250\303\254\303\262\303\271"
# Same letters, written in ISO-8859-1 (one byte per letter)
AccIta1 = "\340\350\354\362\371"
# The word MA-RU-KU, written in katakana using UTF-8
Maruku8 = "\343\203\236\343\203\253\343\202\257"
def test_span_parser(verbose, break_on_first_error, quiet)
good_cases = [
["", [], 'Empty string gives empty list'],
["a", ["a"], 'Easy char'],
[" a", ["a"], 'First space in the paragraph is ignored'],
["a\n \n", ["a"], 'Last spaces in the paragraphs are ignored'],
[' ', [], 'One char => nothing'],
[' ', [], 'Two chars => nothing'],
['a b', ['a b'], 'Spaces are compressed'],
['a b', ['a b'], 'Newlines are spaces'],
["a\nb", ['a b'], 'Newlines are spaces'],
["a\n b", ['a b'], 'Compress newlines 1'],
["a \nb", ['a b'], 'Compress newlines 2'],
[" \nb", ['b'], 'Compress newlines 3'],
["\nb", ['b'], 'Compress newlines 4'],
["b\n", ['b'], 'Compress newlines 5'],
["\n", [], 'Compress newlines 6'],
["\n\n\n", [], 'Compress newlines 7'],
[nil, :throw, "Should throw on nil input"],
# Code blocks
["`" , :throw, 'Unclosed single ticks'],
["``" , :throw, 'Unclosed double ticks'],
["`a`" , [md_code('a')], 'Simple inline code'],
["`` ` ``" , [md_code('`')], ],
["`` \\` ``" , [md_code('\\`')], ],
["``a``" , [md_code('a')], ],
["`` a ``" , [md_code('a')], ],
# Newlines
["a \n", ['a',md_el(:linebreak)], 'Two spaces give br.'],
["a \n", ['a'], 'Newlines 2'],
[" \n", [md_el(:linebreak)], 'Newlines 3'],
[" \n \n", [md_el(:linebreak),md_el(:linebreak)],'Newlines 3'],
[" \na \n", [md_el(:linebreak),'a',md_el(:linebreak)],'Newlines 3'],
# Inline HTML
["a < b", ['a < b'], '< can be on itself'],
["<hr>", [md_html('<hr />')], 'HR will be sanitized'],
["<hr/>", [md_html('<hr />')], 'Closed tag is ok'],
["<hr />", [md_html('<hr />')], 'Closed tag is ok 2'],
["<hr/>a", [md_html('<hr />'),'a'], 'Closed tag is ok 2'],
["<em></em>a", [md_html('<em></em>'),'a'], 'Inline HTML 1'],
["<em>e</em>a", [md_html('<em>e</em>'),'a'], 'Inline HTML 2'],
["a<em>e</em>b", ['a',md_html('<em>e</em>'),'b'], 'Inline HTML 3'],
["<em>e</em>a<em>f</em>",
[md_html('<em>e</em>'),'a',md_html('<em>f</em>')],
'Inline HTML 4'],
["<em>e</em><em>f</em>a",
[md_html('<em>e</em>'),md_html('<em>f</em>'),'a'],
'Inline HTML 5'],
["<img src='a' />", [md_html("<img src='a' />")], 'Attributes'],
["<img src='a'/>"],
# emphasis
["**", :throw, 'Unclosed double **'],
["\\*", ['*'], 'Escaping of *'],
["a *b* ", ['a ', md_em('b')], 'Emphasis 1'],
["a *b*", ['a ', md_em('b')], 'Emphasis 2'],
["a * b", ['a * b'], 'Emphasis 3'],
["a * b*", :throw, 'Unclosed emphasis'],
# same with underscore
["__", :throw, 'Unclosed double __'],
["\\_", ['_'], 'Escaping of _'],
["a _b_ ", ['a ', md_em('b')], 'Emphasis 4'],
["a _b_", ['a ', md_em('b')], 'Emphasis 5'],
["a _ b", ['a _ b'], 'Emphasis 6'],
["a _ b_", :throw, 'Unclosed emphasis'],
["_b_", [md_em('b')], 'Emphasis 7'],
["_b_ _c_", [md_em('b'),' ',md_em('c')], 'Emphasis 8'],
["_b__c_", [md_em('b'),md_em('c')], 'Emphasis 9'],
# strong
["**a*", :throw, 'Unclosed double ** 2'],
["\\**a*", ['*', md_em('a')], 'Escaping of *'],
["a **b** ", ['a ', md_strong('b')], 'Emphasis 1'],
["a **b**", ['a ', md_strong('b')], 'Emphasis 2'],
["a ** b", ['a ** b'], 'Emphasis 3'],
["a ** b**", :throw, 'Unclosed emphasis'],
["**b****c**", [md_strong('b'),md_strong('c')], 'Emphasis 9'],
# strong (with underscore)
["__a_", :throw, 'Unclosed double __ 2'],
["\\__a_", ['_', md_em('a')], 'Escaping of _'],
["a __b__ ", ['a ', md_strong('b')], 'Emphasis 1'],
["a __b__", ['a ', md_strong('b')], 'Emphasis 2'],
["a __ b", ['a __ b'], 'Emphasis 3'],
["a __ b__", :throw, 'Unclosed emphasis'],
["__b____c__", [md_strong('b'),md_strong('c')], 'Emphasis 9'],
# extra strong
["***a**", :throw, 'Unclosed triple *** '],
["\\***a**", ['*', md_strong('a')], 'Escaping of *'],
["a ***b*** ", ['a ', md_emstrong('b')], 'Strong elements'],
["a ***b***", ['a ', md_emstrong('b')]],
["a *** b", ['a *** b']],
["a ** * b", ['a ** * b']],
["***b******c***", [md_emstrong('b'),md_emstrong('c')]],
["a *** b***", :throw, 'Unclosed emphasis'],
# same with underscores
["___a__", :throw, 'Unclosed triple *** '],
["\\___a__", ['_', md_strong('a')], 'Escaping of *'],
["a ___b___ ", ['a ', md_emstrong('b')], 'Strong elements'],
["a ___b___", ['a ', md_emstrong('b')]],
["a ___ b", ['a ___ b']],
["a __ _ b", ['a __ _ b']],
["___b______c___", [md_emstrong('b'),md_emstrong('c')]],
["a ___ b___", :throw, 'Unclosed emphasis'],
# mixing is bad
["*a_", :throw, 'Mixing is bad'],
["_a*", :throw],
["**a__", :throw],
["__a**", :throw],
["___a***", :throw],
["***a___", :throw],
# links of the form [text][ref]
["\\[a]", ["[a]"], 'Escaping 1'],
["\\[a\\]", ["[a]"], 'Escaping 2'],
# This is valid in the new Markdown version
# ["[a]", ["a"], 'Not a link'],
["[a]", [ md_link(["a"],'a')], 'Empty link'],
["[a][]", ],
["[a][]b", [ md_link(["a"],'a'),'b'], 'Empty link'],
["[a\\]][]", [ md_link(["a]"],'a]')], 'Escape inside link'],
["[a", :throw, 'Link not closed'],
["[a][", :throw, 'Ref not closed'],
# links of the form [text](url)
["\\[a](b)", ["[a](b)"], 'Links'],
["[a](url)c", [md_im_link(['a'],'url'),'c'], 'url'],
["[a]( url )c" ],
["[a] ( url )c" ],
["[a] ( url)c" ],
["[a](ur:/l/ 'Title')", [md_im_link(['a'],'ur:/l/','Title')],
'url and title'],
["[a] ( ur:/l/ \"Title\")" ],
["[a] ( ur:/l/ \"Title\")" ],
["[a]( ur:/l/ Title)", :throw, "Must quote title" ],
["[a](url 'Tit\\\"l\\\\e')", [md_im_link(['a'],'url','Tit"l\\e')],
'url and title escaped'],
["[a] ( url \"Tit\\\"l\\\\e\")" ],
["[a] ( url \"Tit\\\"l\\\\e\" )" ],
['[a] ( url "Tit\\"l\\\\e" )' ],
["[a]()", [md_im_link(['a'],'')], 'No URL is OK'],
["[a](\"Title\")", :throw, "No url specified" ],
["[a](url \"Title)", :throw, "Unclosed quotes" ],
["[a](url \"Title\\\")", :throw],
["[a](url \"Title\" ", :throw],
["[a](url \'Title\")", :throw, "Mixing is bad" ],
["[a](url \"Title\')"],
["[a](/url)", [md_im_link(['a'],'/url')], 'Funny chars in url'],
["[a](#url)", [md_im_link(['a'],'#url')]],
["[a](</script?foo=1&bar=2>)", [md_im_link(['a'],'/script?foo=1&bar=2')]],
# Images
["\\![a](url)", ['!', md_im_link(['a'],'url') ], 'Escaping images'],
["![a](url)", [md_im_image(['a'],'url')], 'Image no title'],
["![a]( url )" ],
["![a] ( url )" ],
["![a] ( url)" ],
["![a](url 'ti\"tle')", [md_im_image(['a'],'url','ti"tle')], 'Image with title'],
['![a]( url "ti\\"tle")' ],
["![a](url", :throw, 'Invalid images'],
["![a( url )" ],
["![a] ('url )" ],
["![a][imref]", [md_image(['a'],'imref')], 'Image with ref'],
["![a][ imref]"],
["![a][ imref ]"],
["![a][\timref\t]"],
['<http://example.com/?foo=1&bar=2>',
[md_url('http://example.com/?foo=1&bar=2')], 'Immediate link'],
['a<http://example.com/?foo=1&bar=2>b',
['a',md_url('http://example.com/?foo=1&bar=2'),'b'] ],
['<andrea@censi.org>',
[md_email('andrea@censi.org')], 'Email address'],
['<mailto:andrea@censi.org>'],
["Developmen <http://rubyforge.org/projects/maruku/>",
["Developmen ", md_url("http://rubyforge.org/projects/maruku/")]],
["a<!-- -->b", ['a',md_html('<!-- -->'),'b'],
'HTML Comment'],
["a<!--", :throw, 'Bad HTML Comment'],
["a<!-- ", :throw, 'Bad HTML Comment'],
["<?xml <?!--!`3 ?>", [md_xml_instr('xml','<?!--!`3')], 'XML processing instruction'],
["<? <?!--!`3 ?>", [md_xml_instr('','<?!--!`3')] ],
["<? ", :throw, 'Bad Server directive'],
["a <b", :throw, 'Bad HTML 1'],
["<b", :throw, 'Bad HTML 2'],
["<b!", :throw, 'Bad HTML 3'],
['`<div>`, `<table>`, `<pre>`, `<p>`',
[md_code('<div>'),', ',md_code('<table>'),', ',
md_code('<pre>'),', ',md_code('<p>')],
'Multiple HTLM tags'],
["&andrea", ["&andrea"], 'Parsing of entities'],
# no escaping is allowed
# ["\\&andrea;", ["&andrea;"]],
["l&andrea;", ["l", md_entity('andrea')] ],
["&&andrea;", ["&", md_entity('andrea')] ],
["&123;;&amp;",[md_entity('123'),';',md_entity('amp')]],
["a\nThe [syntax page] [s] provides",
['a The ', md_link(['syntax page'],'s'), ' provides'], 'Regression'],
['![a](url "ti"tle")', [md_im_image(['a'],'url','ti"tle')],
"Image with quotes"],
['![a](url \'ti"tle\')' ],
['[bar](/url/ "Title with "quotes" inside")',
[md_im_link(["bar"],'/url/', 'Title with "quotes" inside')],
"Link with quotes"],
# We dropped this idea
# ['$20,000 and $30,000', ['$20,000 and $30,000'], 'Math: spaces'],
['$20,000$', [md_inline_math('20,000')]],
# ['$ 20,000$', ['$ 20,000$']],
# ['$20,000 $ $20,000$', ['$20,000 $ ', md_inline_math('20,000')]],
["#{Maruku8}", [Maruku8], "Reading UTF-8"],
["#{AccIta1}", [AccIta8], "Converting ISO-8859-1 to UTF-8",
{:encoding => 'iso-8859-1'}],
]
good_cases = unit_tests_for_attribute_lists + good_cases
count = 1; last_comment=""; last_expected=:throw
good_cases.each do |t|
if not t[1]
t[1] = last_expected
else
last_expected = t[1]
end
if not t[2]
t[2] = last_comment + " #{count+=1}"
else
last_comment = t[2]; count=1
end
end
@verbose = verbose
m = Maruku.new
m.attributes[:on_error] = :raise
Globals[:debug_keep_ials] = true
good_cases.each do |input, expected, comment|
output = nil
begin
output = m.parse_span_better(input)
#lines = Maruku.split_lines input
#output = m.parse_lines_as_span(lines)
rescue Exception => e
if not expected == :throw
ex = e.inspect+ "\n"+ e.backtrace.join("\n")
s = comment+describe_difference(input, expected, output)
print_status(comment,'CRASHED :-(', ex+s)
raise e if @break_on_first_error
else
quiet || print_status(comment,'OK')
end
end
if not expected == :throw
if not (expected == output)
s = comment+describe_difference(input, expected, output)
print_status(comment, 'FAILED', s)
break if break_on_first_error
else
quiet || print_status(comment, 'OK')
end
else # I expected a raise
if output
s = comment+describe_difference(input, expected, output)
print_status(comment, 'FAILED (no throw)', s)
break if break_on_first_error
end
end
end
end
PAD=40
def print_status(comment, status, verbose_text=nil)
if comment.size < PAD
comment = comment + (" "*(PAD-comment.size))
end
puts "- #{comment} #{status}"
if @verbose and verbose_text
puts verbose_text
end
end
def describe_difference(input, expected, output)
"\nInput:\n #{input.inspect}" +
"\nExpected:\n #{expected.inspect}" +
"\nOutput:\n #{output.inspect}\n"
end
end end
class Maruku
include MaRuKu::Tests
end
verbose = ARGV.include? 'v'
break_on_first = ARGV.include? 'b'
quiet = ARGV.include? 'q'
Maruku.new.test_span_parser(verbose, break_on_first, quiet)

View file

@ -0,0 +1,136 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'maruku'
class Maruku
def Maruku.failed(test, doc, s)
raise "Test failed: #{s}\n*****\n#{test}\n*****\n"+
"#{doc.inspect}\n*****\n{doc.to_html}"
end
def Maruku.metaTests
ref = {:id => 'id1', :class => ['class1','class2'],
:style=> 'Style is : important = for all } things'}
tests = MetaTests.split('***')
for test in tests
#puts "Test: #{test.inspect}"
doc = Maruku.new(test)
doc.children.size == 1 ||
failed(test, doc, "children != 1")
h = doc.children[0]
h.node_type==:header ||
failed(test, doc, "child not header")
# puts doc.inspect
# puts doc.to_html
end
end
MetaTests = <<EOF
# Head # {ref1 ref2 ref3}
{ref1}: id: id1; class: class1
{ref2}: class: class2
{ref3}: style: "Style is : important = for all } things"
***
# Head # {ref1 ref3 ref2}
{ref1}: id: id1; class: class1
{ref2}: class: class2
{ref3}: style: "Style is : important = for all } things"
***
# Head # {ref1 ref2 ref3}
{ref1}: id= id1; class=class1
{ref2}: class=class2
{ref3}: style="Style is : important = for all } things"
***
# Head # {ref1 ref2 ref3}
{ref1}: id=id1 class=class1
{ref2}: class=class2
{ref3}: style="Style is : important = for all } things"
***
# Head # {ref1 ref2 ref3}
{ref1}: id:id1 class:class1
{ref2}: class : class2
{ref3}: style = "Style is : important = for all } things"
***
# Head # {ref1 ref2 ref3}
{ref1}: id:id1 class:class1
{ref2}: class : class2
{ref3}: style = "Style is : important = for all } things"
***
# Head # {#id1 .class1 ref2 ref3}
{ref2}: class : class2
{ref3}: style = "Style is : important = for all } things"
***
# Head # { #id1 .class1 ref2 ref3 }
{ref2}: class : class2
{ref3}: style = "Style is : important = for all } things"
***
# Head # { id=id1 class=class1 ref2 ref3 }
{ref2}: class : class2
{ref3}: style = "Style is : important = for all } things"
***
# Head # { id:id1 class="class1" class:"class2" style="Style is : important = for all } things"}
EOF
end
if File.basename($0) == 'tests.rb'
Maruku.metaTests
end

199
vendor/plugins/maruku/lib/maruku/toc.rb vendored Normal file
View file

@ -0,0 +1,199 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
class MDDocument
# an instance of Section (see below)
attr_accessor :toc
end
# This represents a section in the TOC.
class Section
# a Fixnum, is == header_element.level
attr_accessor :section_level
# An array of fixnum, like [1,2,5] for Section 1.2.5
attr_accessor :section_number
# reference to header (header has h.meta[:section] to self)
attr_accessor :header_element
# Array of immediate children of this element
attr_accessor :immediate_children
# Array of Section inside this section
attr_accessor :section_children
def initialize
@immediate_children = []
@section_children = []
end
end
class Section
def inspect(indent=1)
s = ""
if @header_element
s += "\_"*indent + "(#{@section_level})>\t #{@section_number.join('.')} : "
s += @header_element.children_to_s +
" (id: '#{@header_element.attributes[:id]}')\n"
else
s += "Master\n"
end
@section_children.each do |c|
s+=c.inspect(indent+1)
end
s
end
# Numerate this section and its children
def numerate(a=[])
self.section_number = a
section_children.each_with_index do |c,i|
c.numerate(a.clone.push(i+1))
end
if h = self.header_element
h.attributes[:section_number] = self.section_number
end
end
include REXML
# Creates an HTML toc.
# Call this on the root
def to_html
div = Element.new 'div'
div.attributes['class'] = 'maruku_toc'
div << create_toc
div
end
def create_toc
ul = Element.new 'ul'
# let's remove the bullets
ul.attributes['style'] = 'list-style: none;'
@section_children.each do |c|
li = Element.new 'li'
if span = c.header_element.render_section_number
li << span
end
a = c.header_element.wrap_as_element('a')
a.delete_attribute 'id'
a.attributes['href'] = "##{c.header_element.attributes[:id]}"
li << a
li << c.create_toc if c.section_children.size>0
ul << li
end
ul
end
# Creates a latex toc.
# Call this on the root
def to_latex
to_latex_rec + "\n\n"
end
def to_latex_rec
s = ""
@section_children.each do |c|
s += "\\noindent"
number = c.header_element.section_number
s += number if number
text = c.header_element.children_to_latex
id = c.header_element.attributes[:id]
s += "\\hyperlink{#{id}}{#{text}}"
s += "\\dotfill \\pageref*{#{id}} \\linebreak\n"
s += c.to_latex_rec if c.section_children.size>0
end
s
end
end
class MDDocument
def create_toc
each_element(:header) do |h|
h.attributes[:id] ||= h.generate_id
end
stack = []
# the ancestor section
s = Section.new
s.section_level = 0
stack.push s
i = 0;
while i < @children.size
while i < @children.size
if @children[i].node_type == :header
level = @children[i].level
break if level <= stack.last.section_level+1
end
stack.last.immediate_children.push @children[i]
i += 1
end
break if i>=@children.size
header = @children[i]
level = header.level
if level > stack.last.section_level
# this level is inside
s2 = Section.new
s2.section_level = level
s2.header_element = header
header.instance_variable_set :@section, s2
stack.last.section_children.push s2
stack.push s2
i+=1
elsif level == stack.last.section_level
# this level is a sibling
stack.pop
else
# this level is a parent
stack.pop
end
end
# If there is only one big header, then assume
# it is the master
if s.section_children.size == 1
s = s.section_children.first
end
# Assign section numbers
s.numerate
s
end
end
end

View file

@ -0,0 +1,33 @@
require 'maruku'
text = <<EOF
Chapter 1
=========
It was a stormy and rainy night.
EOF
invalid = <<EOF
This is a [bad link.
EOF
Maruku.new(text).to_html
s = ""
begin
Maruku.new(invalid, {:on_error => :raise, :error_stream => s})
puts "Error! It should have thrown an exception."
rescue
# puts "ok, got error"
end
begin
Maruku.new(invalid, {:on_error => :warning, :error_stream => s})
rescue
puts "Error! It should not have thrown an exception."
end

View file

@ -0,0 +1,38 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
Version = '0.5.2'
MarukuURL = 'http://maruku.rubyforge.org/'
# If true, use also PHP Markdown extra syntax
#
# Note: it is not guaranteed that if it's false
# then no special features will be used.
def markdown_extra?
true
end
def new_meta_data?
true
end
end