Bring up to current.

master
Jacques Distler 2007-01-22 08:36:51 -06:00
parent 69b62b6f33
commit b19e1e4f47
71 changed files with 8305 additions and 39 deletions

View File

@ -48,6 +48,7 @@ class ApplicationController < ActionController::Base
'.pdf' => 'application/pdf',
'.png' => 'image/png',
'.txt' => 'text/plain',
'.tex' => 'text/plain',
'.zip' => 'application/zip'
} unless defined? FILE_TYPES
@ -114,7 +115,7 @@ class ApplicationController < ActionController::Base
def rescue_action_in_public(exception)
render :status => 500, :text => <<-EOL
<html><body>
<html xmlns="http://www.w3.org/1999/xhtml"><body>
<h2>Internal Error</h2>
<p>An application error occurred while processing your request.</p>
<!-- \n#{exception}\n#{exception.backtrace.join("\n")}\n -->
@ -145,8 +146,10 @@ class ApplicationController < ActionController::Base
def set_content_type_header
if %w(rss_with_content rss_with_headlines).include?(action_name)
@response.headers['Content-Type'] = 'text/xml; charset=UTF-8'
elsif %w(tex).include?(action_name)
@response.headers['Content-Type'] = 'text/plain; charset=UTF-8'
else
@response.headers['Content-Type'] = 'text/html; charset=UTF-8'
@response.headers['Content-Type'] = 'application/xhtml+xml; charset=UTF-8'
end
end

View File

@ -9,7 +9,7 @@ class WikiController < ApplicationController
caches_action :show, :published, :authors, :recently_revised, :list
cache_sweeper :revision_sweeper
layout 'default', :except => [:rss_feed, :rss_with_content, :rss_with_headlines, :tex, :export_tex, :export_html]
layout 'default', :except => [:rss_feed, :rss_with_content, :rss_with_headlines, :tex, :pdf, :export_tex, :export_html]
def index
if @web_name
@ -280,8 +280,12 @@ class WikiController < ApplicationController
end
def tex
if @web.markup == :markdownMML
@tex_content = Maruku.new(@page.content).to_latex
else
@tex_content = RedClothForTex.new(@page.content).to_tex
end
end
protected
@ -305,8 +309,12 @@ class WikiController < ApplicationController
end
def export_page_to_tex(file_path)
tex
File.open(file_path, 'w') { |f| f.write(render_to_string(:template => 'wiki/tex', :layout => false)) }
if @web.markup == :markdownMML
@tex_content = Maruku.new(@page.content).to_latex
else
@tex_content = RedClothForTex.new(@page.content).to_tex
end
File.open(file_path, 'w') { |f| f.write(render_to_string(:template => 'wiki/tex', :layout => 'tex')) }
end
def export_pages_as_zip(file_type, &block)
@ -396,7 +404,11 @@ class WikiController < ApplicationController
def render_tex_web
@web.select.by_name.inject({}) do |tex_web, page|
if @web.markup == :markdownMML
tex_web[page.name] = Maruku.new(page.content).to_latex
else
tex_web[page.name] = RedClothForTex.new(page.content).to_tex
end
tex_web
end
end

View File

@ -23,7 +23,7 @@ module ApplicationHelper
if element.last != selected
options << "<option value=\"#{element.last}\">#{element.first}</option>"
else
options << "<option value=\"#{element.last}\" selected>#{element.first}</option>"
options << "<option value=\"#{element.last}\" selected=\"selected\">#{element.first}</option>"
end
else
options << ((element != selected) ? "<option>#{element}</option>" : "<option selected>#{element}</option>")

View File

@ -7,7 +7,7 @@
</p>
<%= form_tag({ :controller => 'admin', :action => 'create_system' },
{ 'id' => 'setup', 'method' => 'post', 'onSubmit' => 'return validateSetup()',
{ 'id' => 'setup', 'method' => 'post', 'onsubmit' => 'return validateSetup()',
'accept-charset' => 'utf-8' })
%>
<ol class="setup">
@ -22,9 +22,9 @@
</div>
<div class="inputBox">
Name: <input type="text" id="web_name" name="web_name" value="Wiki"
onChange="proposeAddress();" onClick="this.value == 'Wiki' ? this.value = '' : true" />
onchange="proposeAddress();" onclick="this.value == 'Wiki' ? this.value = '' : true" />
&nbsp;&nbsp;
Address: <input type="text" id="web_address" name="web_address" onChange="cleanAddress();"
Address: <input type="text" id="web_address" name="web_address" onchange="cleanAddress();"
value="wiki" />
</div>
</li>

View File

@ -7,7 +7,7 @@
<%= form_tag({ :controller => 'admin', :action => 'create_web' },
{ 'id' => 'setup', 'method' => 'post',
'onSubmit' => 'cleanAddress(); return validateSetup()',
'onsubmit' => 'cleanAddress(); return validateSetup()',
'accept-charset' => 'utf-8' })
%>
@ -21,9 +21,9 @@
The address can only consist of letters and digits.
</div>
<div class="inputBox">
Name: <input type="text" id="web_name" name="name" onChange="proposeAddress();" />
Name: <input type="text" id="web_name" name="name" onchange="proposeAddress();" />
&nbsp;&nbsp;
Address: <input type="text" id="web_address" name="address" onChange="cleanAddress();" />
Address: <input type="text" id="web_address" name="address" onchange="cleanAddress();" />
</div>
</li>
</ol>

View File

@ -2,7 +2,7 @@
<%= form_tag({ :controller => 'admin', :action => 'edit_web', :web => @web.address },
{ 'id' => 'setup', 'method' => 'post',
'onSubmit' => 'cleanAddress(); return validateSetup()',
'onsubmit' => 'cleanAddress(); return validateSetup()',
'accept-charset' => 'utf-8' })
%>
@ -15,9 +15,9 @@
<div class="inputBox">
Name: <input type="text" id="name" name="name" class="disableAutoComplete" value="<%= @web.name %>"
onChange="proposeAddress();" /> &nbsp;&nbsp;
onchange="proposeAddress();" /> &nbsp;&nbsp;
Address: <input type="text" class="disableAutoComplete" id="address" name="address" value="<%= @web.address %>"
onChange="cleanAddress();" />
onchange="cleanAddress();" />
<small><em>(Letters and digits only)</em></small>
</div>
@ -25,7 +25,7 @@
<div class="inputBox">
Markup:
<select name="markup">
<%= html_options({'Textile' => :textile, 'Markdown' => :markdown, 'Mixed' => :mixed,
<%= html_options({'Textile' => :textile, 'Markdown' => :markdown, 'Markdown+itex2MML' => :markdownMML, 'Mixed' => :mixed,
'RDoc' => :rdoc }, @web.markup) %>
</select>
@ -64,7 +64,7 @@
</p>
<a href="#"
onClick="document.getElementById('additionalStyle').style.display='block';return false;">
onclick="document.getElementById('additionalStyle').style.display='block';return false;">
Stylesheet tweaks &gt;&gt;</a>
<small><em>
- add or change styles used by this web; styles defined here take precedence over
@ -120,7 +120,7 @@
<%= form_tag({:controller => 'admin', :web => @web.address, :action => 'remove_orphaned_pages'},
{ :id => 'remove_orphaned_pages',
:onSubmit => "return checkSystemPassword(document.getElementById('system_password_orphaned').value)",
:onsubmit => "return checkSystemPassword(document.getElementById('system_password_orphaned').value)",
'accept-charset' => 'utf-8' })
%>
<p align="right">

View File

@ -13,7 +13,7 @@
<p>
<input type="submit" value="Update" /> as
<input type="text" name="author" id="authorName" value="<%= @author %>"
onClick="this.value == 'AnonymousCoward' ? this.value = '' : true" />
onclick="this.value == 'AnonymousCoward' ? this.value = '' : true" />
<% if @page %>
| <%= link_to 'Cancel', :web => @web.address, :action => 'file'%> <small>(unlocks page)</small>
<% end %>

View File

@ -1,6 +1,4 @@
<!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd" >
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>

View File

@ -0,0 +1 @@
<%= @content_for_layout %>

View File

@ -0,0 +1,13 @@
<h3>Markdown + itex2MML formatting tips (<a target="_new" href="http://daringfireball.net/projects/markdown/syntax">advanced</a>)</h3>
<table cellspacing="0" cellpadding="0">
<tr><td>_your text_</td><td class="arrow">&rarr;</td><td><em>your text</em></td></tr>
<tr><td>**your text**</td><td class="arrow">&rarr;</td><td><strong>your text</strong></td></tr>
<tr><td>`my code`</td><td class="arrow">&rarr;</td><td><code>my code</code></td></tr>
<tr><td>* Bulleted list<br />* Second item</td><td class="arrow">&rarr;</td><td>&#8226; Bulleted list<br />&#8226; Second item</td></tr>
<tr><td>1. Numbered list<br />1. Second item</td><td class="arrow">&rarr;</td><td>1. Numbered list<br />2. Second item</td></tr>
<tr><td>[link name](URL)</td><td class="arrow">&rarr;</td><td><a href="URL">link name</a></td></tr>
<tr><td>***</td><td class="arrow">&rarr;</td><td>Horizontal ruler</td></tr>
<tr><td>&lt;http://url><br />&lt;email@add.com></td><td class="arrow">&rarr;</td><td>Auto-linked</td></tr>
<tr><td>![Alt text](URL)</td><td class="arrow">&rarr;</td><td>Image</td></tr>
</table>
<p>For a complete list of LaTeX commands supported here, see the <a href="http://golem.ph.utexas.edu/~distler/blog/itex2MMLcommands.html">itex2MML Commands Summary</a>.</p>

View File

@ -1,4 +1,4 @@
<h3>Textile formatting tips (<a href="http://hobix.com/textile/quick.html" onClick="quickRedReference(); return false;">advanced</a>)</h3>
<h3>Textile formatting tips (<a href="http://hobix.com/textile/quick.html" onclick="quickRedReference(); return false;">advanced</a>)</h3>
<table cellspacing="0" cellpadding="0">
<tr><td>_your text_</td><td class="arrow">&rarr;</td><td><em>your text</em></td></tr>
<tr><td>*your text*</td><td class="arrow">&rarr;</td><td><strong>your text</strong></td></tr>

View File

@ -11,7 +11,7 @@
<div id="editForm">
<%= form_tag({ :action => 'save', :web => @web.address, :id => @page.name },
{ 'id' => 'editForm', 'method' => 'post', 'onSubmit' => 'cleanAuthorName()',
{ 'id' => 'editForm', 'method' => 'post', 'onsubmit' => 'cleanAuthorName()',
'accept-charset' => 'utf-8' }) %>
<textarea name="content" id="content"><%= h(@flash[:content] || @page.content) %></textarea>

View File

@ -11,7 +11,7 @@
<div id="editForm">
<%= form_tag({ :action => 'save', :web => @web.address, :id => @page_name },
{ 'id' => 'editForm', 'method' => 'post', 'onSubmit' => 'cleanAuthorName();', 'accept-charset' => 'utf-8' }) %>
{ 'id' => 'editForm', 'method' => 'post', 'onsubmit' => 'cleanAuthorName();', 'accept-charset' => 'utf-8' }) %>
<textarea name="content" id="content"><%= h(@flash[:content] || '') %></textarea>
<div id="editFormButtons">

View File

@ -37,7 +37,7 @@
<%= link_to('Print',
{ :web => @web.address, :action => 'print', :id => @page.name },
{ :accesskey => 'p', :name => 'view_print' }) %>
<% if defined? RedClothForTex and RedClothForTex.available? and @web.markup == :textile %>
<% if defined? RedClothForTex and RedClothForTex.available? and @web.markup == :textile or @web.markup == :markdownMML %>
|
<%= link_to 'TeX', {:web => @web.address, :action => 'tex', :id => @page.name},
{:name => 'view_tex'} %>

View File

@ -13,13 +13,13 @@
<div id="editForm">
<%= form_tag({:web => @web.address, :action => 'save', :id => @page.name},
{ :id => 'editForm', :method => 'post', :onSubmit => 'cleanAuthorName();',
{ :id => 'editForm', :method => 'post', :onsubmit => 'cleanAuthorName();',
'accept-charset' => 'utf-8' }) %>
<textarea name="content" id="content"><%= @revision.content %></textarea>
<div id="editFormButtons">
<input type="submit" value="Update" accesskey="u" /> as
<input type="text" name="author" id="authorName" value="<%= @author %>"
onClick="this.value == 'AnonymousCoward' ? this.value = '' : true" />
onclick="this.value == 'AnonymousCoward' ? this.value = '' : true" />
|
<span>
<%= link_to('Cancel', {:web => @web.address, :action => 'cancel_edit', :id => @page.name},

View File

@ -1,12 +1,12 @@
\documentclass[12pt,titlepage]{article}
\usepackage[danish]{babel} %danske tekster
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage[OT1]{fontenc} %rigtige danske bogstaver...
\usepackage{a4}
\usepackage{graphicx}
\usepackage{ucs}
\usepackage[utf8x]{inputenc}
\input epsf
\usepackage{hyperref}
%-------------------------------------------------------------------

BIN
db/production.db.sqlite3 Normal file

Binary file not shown.

78
db/schema.rb Normal file
View File

@ -0,0 +1,78 @@
# This file is autogenerated. Instead of editing this file, please use the
# migrations feature of ActiveRecord to incrementally modify your database, and
# then regenerate this schema definition.
ActiveRecord::Schema.define(:version => 2) do
create_table "pages", :force => true do |t|
t.column "created_at", :datetime, :null => false
t.column "updated_at", :datetime, :null => false
t.column "web_id", :integer, :default => 0, :null => false
t.column "locked_by", :string, :limit => 60
t.column "name", :string, :limit => 60
t.column "locked_at", :datetime
end
create_table "revisions", :force => true do |t|
t.column "created_at", :datetime, :null => false
t.column "updated_at", :datetime, :null => false
t.column "revised_at", :datetime, :null => false
t.column "page_id", :integer, :default => 0, :null => false
t.column "content", :text, :default => "", :null => false
t.column "author", :string, :limit => 60
t.column "ip", :string, :limit => 60
end
add_index "revisions", ["author"], :name => "revisions_author_index"
add_index "revisions", ["created_at"], :name => "revisions_created_at_index"
add_index "revisions", ["page_id"], :name => "revisions_page_id_index"
create_table "sessions", :force => true do |t|
t.column "session_id", :string
t.column "data", :text
t.column "updated_at", :datetime
end
add_index "sessions", ["session_id"], :name => "sessions_session_id_index"
create_table "system", :force => true do |t|
t.column "password", :string, :limit => 60
end
create_table "webs", :force => true do |t|
t.column "created_at", :datetime, :null => false
t.column "updated_at", :datetime, :null => false
t.column "name", :string, :limit => 60, :default => "", :null => false
t.column "address", :string, :limit => 60, :default => "", :null => false
t.column "password", :string, :limit => 60
t.column "additional_style", :string
t.column "allow_uploads", :integer, :default => 1
t.column "published", :integer, :default => 0
t.column "count_pages", :integer, :default => 0
t.column "markup", :string, :limit => 50, :default => "textile"
t.column "color", :string, :limit => 6, :default => "008B26"
t.column "max_upload_size", :integer, :default => 100
t.column "safe_mode", :integer, :default => 0
t.column "brackets_only", :integer, :default => 0
end
create_table "wiki_files", :force => true do |t|
t.column "created_at", :datetime, :null => false
t.column "updated_at", :datetime, :null => false
t.column "web_id", :integer, :null => false
t.column "file_name", :string, :null => false
t.column "description", :string, :null => false
end
create_table "wiki_references", :force => true do |t|
t.column "created_at", :datetime, :null => false
t.column "updated_at", :datetime, :null => false
t.column "page_id", :integer, :default => 0, :null => false
t.column "referenced_name", :string, :limit => 60, :default => "", :null => false
t.column "link_type", :string, :limit => 1, :default => "", :null => false
end
add_index "wiki_references", ["referenced_name"], :name => "wiki_references_referenced_name_index"
add_index "wiki_references", ["page_id"], :name => "wiki_references_page_id_index"
end

View File

@ -1,7 +1,6 @@
#!/bin/sh
#!/usr/bin/env ruby
cd $(dirname $0)
export LD_LIBRARY_PATH=./lib/native/linux-x86:$LD_LIBRARY_PATH
ruby script/server
# Executable file for a gem
# must be same as ./instiki.rb
load File.dirname(__FILE__) + '/script/server'

View File

@ -1112,7 +1112,7 @@ class BlueCloth < String
### Return a copy of +str+ with angle brackets and ampersands HTML-encoded.
def encode_html( str )
str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w{1,8});)/i, "&amp;" ).
str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w+);)/i, "&amp;" ).
gsub( %r{<(?![a-z/?\$!])}i, "&lt;" )
end

View File

@ -40,6 +40,14 @@ module Engines
end
end
class MarkdownMML < AbstractEngine
def mask
require_dependency 'maruku'
require_dependency 'maruku/ext/math'
Maruku.new(@content.delete("\r")).to_html
end
end
class Mixed < AbstractEngine
def mask
require_dependency 'redcloth'
@ -57,6 +65,6 @@ module Engines
end
end
MAP = { :textile => Textile, :markdown => Markdown, :mixed => Mixed, :rdoc => RDoc }
MAP = { :textile => Textile, :markdown => Markdown, :markdownMML => MarkdownMML, :mixed => Mixed, :rdoc => RDoc }
MAP.default = Textile
end

133
lib/maruku.rb Normal file
View File

@ -0,0 +1,133 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'rexml/document'
# :include:MaRuKu.txt
module MaRuKu
module In
module Markdown
module SpanLevelParser; end
module BlockLevelParser; end
end
# more to come?
end
module Out
# Functions for exporting to MarkDown.
module Markdown; end
# Functions for exporting to HTML.
module HTML; end
# Functions for exporting to Latex
module Latex; end
end
# These are strings utilities.
module Strings; end
module Helpers; end
module Errors; end
class MDElement
include REXML
include MaRuKu
include Out::Markdown
include Out::HTML
include Out::Latex
include Strings
include Helpers
include Errors
end
class MDDocument < MDElement
include In::Markdown
include In::Markdown::SpanLevelParser
include In::Markdown::BlockLevelParser
end
end
# This is the public interface
class Maruku < MaRuKu::MDDocument; end
require 'rexml/document'
# Structures definition
require 'maruku/structures'
require 'maruku/structures_inspect'
require 'maruku/defaults'
# Less typing
require 'maruku/helpers'
# Code for parsing whole Markdown documents
require 'maruku/input/parse_doc'
# Ugly things kept in a closet
require 'maruku/string_utils'
require 'maruku/input/linesource'
require 'maruku/input/type_detection'
# A class for reading and sanitizing inline HTML
require 'maruku/input/html_helper'
# Code for parsing Markdown block-level elements
require 'maruku/input/parse_block'
# Code for parsing Markdown span-level elements
require 'maruku/input/charsource'
require 'maruku/input/parse_span_better'
require 'maruku/input/rubypants'
require 'maruku/input/extensions'
require 'maruku/attributes'
require 'maruku/structures_iterators'
require 'maruku/errors_management'
# Code for creating a table of contents
require 'maruku/toc'
# Version and URL
require 'maruku/version'
# Exporting to html
require 'maruku/output/to_html'
# Exporting to latex
require 'maruku/output/to_latex'
require 'maruku/output/to_latex_strings'
require 'maruku/output/to_latex_entities'
# Pretty print
require 'maruku/output/to_markdown'
# Exporting to text: strips all formatting (not complete)
require 'maruku/output/to_s'
# class Maruku is the global interface
require 'maruku/maruku'

View File

@ -0,0 +1,462 @@
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
# NOTE: this is the old span-level regexp-based parser.
#
# The new parser is a real parser and is defined with functions in parse_span_better.rb
# The new parser is faster, handles syntax errors, but it's absolutely not readable.
#
# Also, regexp parsers simply CANNOT handle inline HTML properly.
# There are two black-magic methods `match_couple_of` and `map_match`,
# defined at the end of the file, that make the function
# `parse_lines_as_span` so elegant.
class Maruku
# Takes care of all span-level formatting, links, images, etc.
#
# Lines must not contain block-level elements.
def parse_lines_as_span(lines)
# first, get rid of linebreaks
res = resolve_linebreaks(lines)
span = MDElement.new(:dummy, res)
# encode all escapes
span.replace_each_string { |s| s.escape_md_special }
# The order of processing is significant:
# 1. inline code
# 2. immediate links
# 3. inline HTML
# 4. everything else
# search for ``code`` markers
span.match_couple_of('``') { |children, match1, match2|
e = create_md_element(:inline_code)
# this is now opaque to processing
e.meta[:raw_code] = children.join('').it_was_a_code_block
e
}
# Search for `single tick` code markers
span.match_couple_of('`') { |children, match1, match2|
e = create_md_element(:inline_code)
# this is now opaque to processing
e.meta[:raw_code] = children.join('').it_was_a_code_block
# this is now opaque to processing
e
}
# Detect any immediate link: <http://www.google.com>
# we expect an http: or something: at the beginning
span.map_match( /<(\w+:[^\>]+)>/) { |match|
url = match[1]
e = create_md_element(:immediate_link, [])
e.meta[:url] = url
e
}
# Search for inline HTML (the support is pretty basic for now)
# this searches for a matching block
inlineHTML1 = %r{
( # put everything in 1
< # open
(\w+) # opening tag in 2
> # close
.* # anything
</\2> # match closing tag
)
}x
# this searches for only one block
inlineHTML2 = %r{
( # put everything in 1
< # open
\w+ #
# close
[^<>]* # anything except
/> # closing tag
)
}x
for reg in [inlineHTML1, inlineHTML2]
span.map_match(reg) { |match|
raw_html = match[1]
convert_raw_html_in_list(raw_html)
}
end
# Detect footnotes references: [^1]
span.map_match(/\[(\^[^\]]+)\]/) { |match|
id = match[1].strip.downcase
e = create_md_element(:footnote_reference)
e.meta[:footnote_id] = id
e
}
# Detect any image like ![Alt text][url]
span.map_match(/\!\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match|
alt = match[1]
id = match[2].strip.downcase
if id.size == 0
id = text.strip.downcase
end
e = create_md_element(:image)
e.meta[:ref_id] = id
e
}
# Detect any immage with immediate url: ![Alt](url "title")
# a dummy ref is created and put in the symbol table
link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/
span.map_match(link1) { |match|
alt = match[1]
url = match[2]
title = match[3]
url = url.strip
# create a dummy id
id="dummy_#{@refs.size}"
@refs[id] = {:url=>url, :title=>title}
e = create_md_element(:image)
e.meta[:ref_id] = id
e
}
# an id reference: "[id]", "[ id ]"
reg_id_ref = %r{
\[ # opening bracket
([^\]]*) # 0 or more non-closing bracket (this is too permissive)
\] # closing bracket
}x
# validates a url, only $1 is set to the url
reg_url =
/((?:\w+):\/\/(?:\w+:{0,1}\w*@)?(?:\S+)(?::[0-9]+)?(?:\/|\/([\w#!:.?+=&%@!\-\/]))?)/
reg_url = %r{([^\s\]\)]+)}
# A string enclosed in quotes.
reg_title = %r{
" # opening
[^"]* # anything = 1
" # closing
}x
# [bah](http://www.google.com "Google.com"),
# [bah](http://www.google.com),
# [empty]()
reg_url_and_title = %r{
\( # opening
\s* # whitespace
#{reg_url}? # url = 1 might be empty
(?:\s+["'](.*)["'])? # optional title = 2
\s* # whitespace
\) # closing
}x
# Detect a link like ![Alt text][id]
span.map_match(/\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match|
text = match[1]
id = match[2].strip.downcase
if id.size == 0
id = text.strip.downcase
end
children = parse_lines_as_span(text)
e = create_md_element(:link, children)
e.meta[:ref_id] = id
e
}
# Detect any immage with immediate url: ![Alt](url "title")
# a dummy ref is created and put in the symbol table
link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/
span.map_match(link1) { |match|
text = match[1]
children = parse_lines_as_span(text)
url = match[2]
title = match[3]
url = url.strip
# create a dummy id
id="dummy_#{@refs.size}"
@refs[id] = {:url=>url, :title=>title}
@refs[id][:title] = title if title
e = create_md_element(:link, children)
e.meta[:ref_id] = id
e
}
# Detect any link like [Google engine][google]
span.match_couple_of('[', # opening bracket
%r{\] # closing bracket
[ ]? # optional whitespace
#{reg_id_ref} # ref id, with $1 being the reference
}x
) { |children, match1, match2|
id = match2[1]
id = id.strip.downcase
if id.size == 0
id = children.join.strip.downcase
end
e = create_md_element(:link, children)
e.meta[:ref_id] = id
e
}
# Detect any link with immediate url: [Google](http://www.google.com)
# XXX Note that the url can be empty: [Empty]()
# a dummy ref is created and put in the symbol table
span.match_couple_of('[', # opening bracket
%r{\] # closing bracket
[ ]? # optional whitespace
#{reg_url_and_title} # ref id, with $1 being the url and $2 being the title
}x
) { |children, match1, match2|
url = match2[1]
title = match2[3] # XXX? Is it a bug? I would use [2]
# create a dummy id
id="dummy_#{@refs.size}"
@refs[id] = {:url=>url}
@refs[id][:title] = title if title
e = create_md_element(:link, children)
e.meta[:ref_id] = id
e
}
# Detect an email address <andrea@invalid.it>
span.map_match(EMailAddress) { |match|
email = match[1]
e = create_md_element(:email_address, [])
e.meta[:email] = email
e
}
# Detect HTML entitis
span.map_match(/&([\w\d]+);/) { |match|
entity_name = match[1]
e = create_md_element(:entity, [])
e.meta[:entity_name] = entity_name
e
}
# And now the easy stuff
# search for ***strong and em***
span.match_couple_of('***') { |children,m1,m2|
create_md_element(:strong, [create_md_element(:emphasis, children)] ) }
span.match_couple_of('___') { |children,m1,m2|
create_md_element(:strong, [create_md_element(:emphasis, children)] ) }
# search for **strong**
span.match_couple_of('**') { |children,m1,m2| create_md_element(:strong, children) }
# search for __strong__
span.match_couple_of('__') { |children,m1,m2| create_md_element(:strong, children) }
# search for *emphasis*
span.match_couple_of('*') { |children,m1,m2| create_md_element(:emphasis, children) }
# search for _emphasis_
span.match_couple_of('_') { |children,m1,m2| create_md_element(:emphasis, children) }
# finally, unescape the special characters
span.replace_each_string { |s| s.unescape_md_special}
span.children
end
# returns array containing Strings or :linebreak elements
def resolve_linebreaks(lines)
res = []
s = ""
lines.each do |l|
s += (s.size>0 ? " " : "") + l.strip
if force_linebreak?(l)
res << s
res << create_md_element(:linebreak)
s = ""
end
end
res << s if s.size > 0
res
end
# raw_html is something like
# <em> A</em> dopwkk *maruk* <em>A</em>
def convert_raw_html_in_list(raw_html)
e = create_md_element(:raw_html)
e.meta[:raw_html] = raw_html
begin
e.meta[:parsed_html] = Document.new(raw_html)
rescue
$stderr.puts "convert_raw_html_in_list Malformed HTML:\n#{raw_html}"
end
e
end
end
# And now the black magic that makes the part above so elegant
class MDElement
# Try to match the regexp to each string in the hierarchy
# (using `replace_each_string`). If the regexp match, eliminate
# the matching string and substitute it with the pre_match, the
# result of the block, and the post_match
#
# ..., matched_string, ... -> ..., pre_match, block.call(match), post_match
#
# the block might return arrays.
#
def map_match(regexp, &block)
replace_each_string { |s|
processed = []
while (match = regexp.match(s))
# save the pre_match
processed << match.pre_match if match.pre_match && match.pre_match.size>0
# transform match
result = block.call(match)
# and append as processed
[*result].each do |e| processed << e end
# go on with the rest of the string
s = match.post_match
end
processed << s if s.size > 0
processed
}
end
# Finds couple of delimiters in a hierarchy of Strings and MDElements
#
# Open and close are two delimiters (like '[' and ']'), or two Regexp.
#
# If you don't pass close, it defaults to open.
#
# Each block is called with |contained children, match1, match2|
def match_couple_of(open, close=nil, &block)
close = close || open
open_regexp = open.kind_of?(Regexp) ? open : Regexp.new(Regexp.escape(open))
close_regexp = close.kind_of?(Regexp) ? close : Regexp.new(Regexp.escape(close))
# Do the same to children first
for c in @children; if c.kind_of? MDElement
c.match_couple_of(open_regexp, close_regexp, &block)
end end
processed_children = []
until @children.empty?
c = @children.shift
if c.kind_of? String
match1 = open_regexp.match(c)
if not match1
processed_children << c
else # we found opening, now search closing
# puts "Found opening (#{marker}) in #{c.inspect}"
# pre match is processed
processed_children.push match1.pre_match if
match1.pre_match && match1.pre_match.size > 0
# we will process again the post_match
@children.unshift match1.post_match if
match1.post_match && match1.post_match.size>0
contained = []; found_closing = false
until @children.empty? || found_closing
c = @children.shift
if c.kind_of? String
match2 = close_regexp.match(c)
if not match2
contained << c
else
# we found closing
found_closing = true
# pre match is contained
contained.push match2.pre_match if
match2.pre_match && match2.pre_match.size>0
# we will process again the post_match
@children.unshift match2.post_match if
match2.post_match && match2.post_match.size>0
# And now we call the block
substitute = block.call(contained, match1, match2)
processed_children << substitute
# puts "Found closing (#{marker}) in #{c.inspect}"
# puts "Children: #{contained.inspect}"
# puts "Substitute: #{substitute.inspect}"
end
else
contained << c
end
end
if not found_closing
# $stderr.puts "##### Could not find closing for #{open}, #{close} -- ignoring"
processed_children << match1.to_s
contained.reverse.each do |c|
@children.unshift c
end
end
end
else
processed_children << c
end
end
raise "BugBug" unless @children.empty?
rebuilt = []
# rebuild strings
processed_children.each do |c|
if c.kind_of?(String) && rebuilt.last && rebuilt.last.kind_of?(String)
rebuilt.last << c
else
rebuilt << c
end
end
@children = rebuilt
end
end

218
lib/maruku/attributes.rb Normal file
View File

@ -0,0 +1,218 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class String
def quote_if_needed
if /[\s\'\"]/.match self
inspect
else
self
end
end
end
module MaRuKu;
MagicChar = ':'
class AttributeList < Array
# An attribute list becomes
# {#id .cl key="val" ref}
# [ [:id, 'id'], [:class, 'id'], ['key', 'val'], [ :ref, 'ref' ]]
private :push
def push_key_val(key, val);
raise "Bad #{key.inspect}=#{val.inspect}" if not key and val
push [key, val]
end
def push_ref(ref_id);
raise "Bad :ref #{ref_id.inspect}" if not ref_id
push [:ref, ref_id]
end
def push_class(val);
raise "Bad :id #{val.inspect}" if not val
push [:class, val]
end
def push_id(val);
raise "Bad :id #{val.inspect}" if not val
push [:id, val]
end
def to_s
map do |k,v|
case k
when :id; "#" + v.quote_if_needed
when :class; "." + v.quote_if_needed
when :ref; v.quote_if_needed
else k.quote_if_needed + "=" + v.quote_if_needed
end
end . join(' ')
end
alias to_md to_s
end
end
module MaRuKu; module In; module Markdown; module SpanLevelParser
def unit_tests_for_attribute_lists
[
[ "", [], "Empty lists are allowed" ],
[ "=", :throw, "Bad char to begin a list with." ],
[ "a =b", :throw, "No whitespace before `=`." ],
[ "a= b", :throw, "No whitespace after `=`." ],
[ "a b c", [[:ref, 'a'],[:ref, 'b'],[:ref, 'c']], "More than one ref" ],
[ "hello notfound", [[:ref, 'hello'],[:ref, 'notfound']]],
[ "'a'", [[:ref, 'a']], "Quoted value." ],
[ '"a"' ],
[ "a=b", [['a','b']], "Simple key/val" ],
[ "'a'=b" ],
[ "'a'='b'" ],
[ "a='b'" ],
[ 'a="b\'"', [['a',"b\'"]], "Key/val with quotes" ],
[ 'a=b\''],
[ 'a="\\\'b\'"', [['a',"\'b\'"]], "Key/val with quotes" ],
['"', :throw, "Unclosed quotes"],
["'"],
["'a "],
['"a '],
[ "#a", [[:id, 'a']], "Simple ID" ],
[ "#'a'" ],
[ '#"a"' ],
[ "#", :throw, "Unfinished '#'." ],
[ ".", :throw, "Unfinished '.'." ],
[ "# a", :throw, "No white-space after '#'." ],
[ ". a", :throw, "No white-space after '.' ." ],
[ "a=b c=d", [['a','b'],['c','d']], "Tabbing" ],
[ " \ta=b \tc='d' "],
[ "\t a=b\t c='d'\t\t"],
[ ".\"a'", :throw, "Mixing quotes is bad." ],
].map { |s, expected, comment|
@expected = (expected ||= @expected)
@comment = (comment ||= (last=@comment) )
(comment == last && (comment += (@count+=1).to_s)) || @count = 1
expected = [md_ial(expected)] if expected.kind_of? Array
["{#{MagicChar}#{s}}", expected, "Attributes: #{comment}"]
}
end
def md_al(s=[]); AttributeList.new(s) end
# returns nil or an AttributeList
def read_attribute_list(src, con, break_on_chars)
separators = break_on_chars + [?=,?\ ,?\t]
escaped = Maruku::EscapedCharInQuotes
al = AttributeList.new
while true
src.consume_whitespace
break if break_on_chars.include? src.cur_char
case src.cur_char
when nil
maruku_error "Attribute list terminated by EOF:\n "+
"#{al.inspect}" , src, con
tell_user "I try to continue and return partial attribute list:\n"+
al.inspect
break
when ?= # error
maruku_error "In attribute lists, cannot start identifier with `=`."
tell_user "I try to continue"
src.ignore_char
when ?# # id definition
src.ignore_char
if id = read_quoted_or_unquoted(src, con, escaped, separators)
al.push_id id
else
maruku_error 'Could not read `id` attribute.', src, con
tell_user 'Trying to ignore bad `id` attribute.'
end
when ?. # class definition
src.ignore_char
if klass = read_quoted_or_unquoted(src, con, escaped, separators)
al.push_class klass
else
maruku_error 'Could not read `class` attribute.', src, con
tell_user 'Trying to ignore bad `class` attribute.'
end
else
if key = read_quoted_or_unquoted(src, con, escaped, separators)
if src.cur_char == ?=
src.ignore_char # skip the =
if val = read_quoted_or_unquoted(src, con, escaped, separators)
al.push_key_val(key, val)
else
maruku_error "Could not read value for key #{key.inspect}.",
src, con
tell_user "Ignoring key #{key.inspect}."
end
else
al.push_ref key
end
else
maruku_error 'Could not read key or reference.'
end
end # case
end # while true
al
end
def merge_ial(elements, src, con)
# We need a helper
def is_ial(e); e.kind_of? MDElement and e.node_type == :ial end
# Apply each IAL to the element before
elements.each_with_index do |e, i|
if is_ial(e) && i>= 1 then
before = elements[i-1]
after = elements[i+1]
if before.kind_of? MDElement
before.al = e.ial
elsif after.kind_of? MDElement
after.al = e.ial
else
maruku_error "I don't know who you are referring to:"+
" {#{e.ial.to_md}}", src, con
# xxx dire se c'è empty vicino
maruku_recover "Ignoring IAL: {#{e.ial.to_md}}", src, con
end
end
end
if not Globals[:debug_keep_ials]
elements.delete_if {|x| is_ial(x) unless x == elements.first}
end
end
end end end end
#module MaRuKu; module In; module Markdown; module SpanLevelParser

52
lib/maruku/defaults.rb Normal file
View File

@ -0,0 +1,52 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
Globals = {
:unsafe_features => false,
:debug_keep_ials => false,
:maruku_signature => false,
:code_background_color => '#fef',
:code_show_spaces => false,
:html_math_engine => 'itex2mml', #ritex, itex2mml, none
:html_use_syntax => false,
:on_error => :warning
}
class MDElement
def get_setting(sym)
if self.attributes.has_key?(sym) then
return self.attributes[sym]
elsif self.doc && self.doc.attributes.has_key?(sym) then
return self.doc.attributes[sym]
elsif MaRuKu::Globals.has_key?(sym)
return MaRuKu::Globals[sym]
else
$stderr.puts "Bug: no default for #{sym.inspect}"
nil
end
end
end
end

View File

@ -0,0 +1,92 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
#m Any method that detects formatting error calls the
#m maruku_error() method.
#m if @meta[:on_error] ==
#m
#m - :warning write on the standard err (or @error_stream if defined),
#m then do your best.
#m - :ignore be shy and try to continue
#m - :raise raises a MarukuException
#m
#m default is :raise
module MaRuKu
class Exception < RuntimeError
end
module Errors
def maruku_error(s,src=nil,con=nil)
policy = get_setting(:on_error)
case policy
when :ignore
when :raise
raise_error create_frame(describe_error(s,src,con))
when :warning
tell_user create_frame(describe_error(s,src,con))
else
raise "BugBug: policy = #{policy.inspect}"
end
end
def maruku_recover(s,src=nil,con=nil)
tell_user create_frame(describe_error(s,src,con))
end
alias error maruku_error
def raise_error(s)
raise MaRuKu::Exception, s, caller
end
def tell_user(s)
error_stream = self.attributes[:error_stream] || $stderr
error_stream << s
end
def create_frame(s)
n = 75
"\n" +
" "+"_"*n << "\n"<<
"| Maruku tells you:\n" <<
"+"+"-"*n +"\n"+
add_tabs(s,1,'| ') << "\n" <<
"+" << "-"*n << "\n" <<
add_tabs(caller[0, 5].join("\n"),1,'!') << "\n" <<
"\\" << "_"*n << "\n"
end
def describe_error(s,src,con)
t = s
src && (t += "\n#{src.describe}\n")
con && (t += "\n#{con.describe}\n")
t
end
end # Errors
end # MaRuKu

10
lib/maruku/ext/math.rb Normal file
View File

@ -0,0 +1,10 @@
require 'maruku/ext/math/elements'
require 'maruku/ext/math/parsing'
require 'maruku/ext/math/to_latex'
require 'maruku/ext/math/to_html'
require 'maruku/ext/math/mathml_engines/none'
require 'maruku/ext/math/mathml_engines/ritex'
require 'maruku/ext/math/mathml_engines/itex2mml'

View File

@ -0,0 +1,26 @@
module MaRuKu; class MDElement
def md_inline_math(math)
self.md_el(:inline_math, [], meta={:math=>math})
end
def md_equation(math, label=nil)
reglabel= /\\label\{(\w+)\}/
if math =~ reglabel
label = $1
math.gsub!(reglabel,'')
end
# puts "Found label = #{label} math #{math.inspect} "
num = nil
if label && @doc #take number
@doc.eqid2eq ||= {}
num = @doc.eqid2eq.size + 1
end
e = self.md_el(:equation, [], meta={:math=>math, :label=>label,:num=>num})
if label && @doc #take number
@doc.eqid2eq[label] = e
end
e
end
end end

View File

@ -0,0 +1,35 @@
module MaRuKu; module Out; module HTML
def convert_to_mathml_itex2mml(tex, method)
begin
if not $itex2mml_parser
require 'itextomml'
$itex2mml_parser = Itex2MML::Parser.new
end
mathml = $itex2mml_parser.send(method, tex)
doc = Document.new(mathml, {:respect_whitespace =>:all}).root
return doc
rescue LoadError => e
maruku_error "Could not load package 'itex2mml'.\n"+
"Please install it."
rescue REXML::ParseException => e
maruku_error "Invalid MathML TeX: \n#{add_tabs(tex,1,'tex>')}"+
"\n\n #{e.inspect}"
rescue
maruku_error "Could not produce MathML TeX: \n#{tex}"+
"\n\n #{e.inspect}"
end
nil
end
def to_html_inline_math_itex2mml
convert_to_mathml_itex2mml(self.math, :inline_filter)
end
def to_html_equation_itex2mml
convert_to_mathml_itex2mml(self.math, :block_filter)
end
end end end

View File

@ -0,0 +1,20 @@
module MaRuKu; module Out; module HTML
def to_html_inline_math_none
# You can: either return a REXML::Element
# return Element.new 'div'
# or return an empty array on error
# return []
# or have a string parsed by REXML:
tex = self.math
tex.gsub!('&','&amp;')
mathml = "<code>#{tex}</code>"
return Document.new(mathml).root
end
def to_html_equation_none
return to_html_inline_math_none
end
end end end

View File

@ -0,0 +1,34 @@
module MaRuKu; module Out; module HTML
def convert_to_mathml_ritex(tex)
begin
if not $ritex_parser
require 'ritex'
$ritex_parser = Ritex::Parser.new
end
mathml = $ritex_parser.parse(tex.strip)
doc = Document.new(mathml, {:respect_whitespace =>:all}).root
return doc
rescue LoadError => e
maruku_error "Could not load package 'ritex'.\n"+
"Please install it using:\n"+
" $ gem install ritex\n\n"+e.inspect
rescue Racc::ParseError => e
maruku_error "Could not parse TeX: \n#{tex}"+
"\n\n #{e.inspect}"
end
nil
end
def to_html_inline_math_ritex
tex = self.math
mathml = convert_to_mathml_ritex(tex)
return mathml || []
end
def to_html_equation_ritex
tex = self.math
mathml = convert_to_mathml_ritex(tex)
return mathml || []
end
end end end

View File

@ -0,0 +1,82 @@
module MaRuKu
class MDDocument
# Hash equation id (String) to equation element (MDElement)
attr_accessor :eqid2eq
end
end
# At least one slash inside
#RegInlineMath1 = /\$([^\$]*[\\][^\$]*)\$/
# No spaces around the delimiters
#RegInlineMath2 = /\$([^\s\$](?:[^\$]*[^\s\$])?)\$/
#RegInlineMath = Regexp::union(RegInlineMath1,RegInlineMath2)
# Everything goes; takes care of escaping the "\$" inside the expression
RegInlineMath = /\${1}((?:[^\$]|\\\$)+)\$/
MaRuKu::In::Markdown::
register_span_extension(:chars => ?$, :regexp => RegInlineMath) do
|doc, src, con|
if m = src.read_regexp(RegInlineMath)
math = m.captures.compact.first
con.push doc.md_inline_math(math)
true
else
#puts "not math: #{src.cur_chars 10}"
false
end
end
EquationStart = /^[ ]{0,3}(?:\\\[|\$\$)(.*)$/
EqLabel = /(?:\((\w+)\))/
OneLineEquation = /^[ ]{0,3}(?:\\\[|\$\$)(.*)(?:\\\]|\$\$)\s*#{EqLabel}?\s*$/
EquationEnd = /^(.*)(?:\\\]|\$\$)\s*#{EqLabel}?\s*$/
MaRuKu::In::Markdown::
register_block_extension(:regexp => EquationStart) do |doc, src, con|
# puts "Equation :#{self}"
first = src.shift_line
if first =~ OneLineEquation
math = $1
label = $2
con.push doc.md_equation($1, $2)
else
first =~ EquationStart
math = $1
label = nil
while true
if not src.cur_line
maruku_error "Stream finished while reading equation\n\n"+
add_tabs(math,1,'$> '), src, con
break
end
line = src.shift_line
if line =~ EquationEnd
math += $1 + "\n"
label = $2 if $2
break
else
math += line + "\n"
end
end
con.push doc.md_equation(math, label)
end
true
end
# This adds support for \eqref
RegEqrefLatex = /\\eqref\{(\w+)\}/
RegEqPar = /\(eq:(\w+)\)/
RegEqref = Regexp::union(RegEqrefLatex, RegEqPar)
MaRuKu::In::Markdown::
register_span_extension(:chars => [?\\, ?(], :regexp => RegEqref) do
|doc, src, con|
eqid = src.read_regexp(RegEqref).captures.compact.first
r = doc.md_el(:eqref, [], meta={:eqid=>eqid})
con.push r
true
end

View File

@ -0,0 +1,107 @@
=begin maruku_doc
Attribute: html_math_engine
Scope: document, element
Output: html
Summary: Select the rendering engine for math.
Default: <?mrk Globals[:html_math_engine].to_s ?>
Select the rendering engine for math.
If you want to use your engine `foo`, then set:
HTML math engine: foo
{:lang=markdown}
and then implement two functions:
def to_html_inline_math_foo
# You can: either return a REXML::Element
# return Element.new 'div'
# or return an empty array on error
# return []
# or have a string parsed by REXML:
tex = self.math
tex.gsub!('&','&amp;')
mathml = "<code>#{tex}</code>"
return Document.new(mathml).root
end
def to_html_equation_foo
# same thing
...
end
{:lang=ruby}
=end
module MaRuKu; module Out; module HTML
def to_html_inline_math
s = get_setting(:html_math_engine)
method = "to_html_inline_math_#{s}".to_sym
if self.respond_to? method
self.send method || to_html_equation_none
else
puts "A method called #{method} should be defined."
return []
end
end
def add_class_to(el, cl)
el.attributes['class'] =
if already = el.attributes['class']
already + " " + cl
else
cl
end
end
def to_html_equation
s = get_setting(:html_math_engine)
method = "to_html_equation_#{s}".to_sym
if self.respond_to? method
mathml = self.send(method) || to_html_equation_none
div = create_html_element 'div'
add_class_to(div, 'maruku-equation')
if self.label # then numerate
span = Element.new 'span'
span.attributes['class'] = 'maruku-eq-number'
num = self.num
span << Text.new("(#{num})")
div << span
div.attributes['id'] = "eq:#{self.label}"
end
div << mathml
source_div = Element.new 'div'
add_class_to(source_div, 'maruku-eq-tex')
code = to_html_equation_none
code.attributes['style'] = 'display: none'
source_div << code
div << source_div
div
else
puts "A method called #{method} should be defined."
return []
end
end
def to_html_eqref
if eq = self.doc.eqid2eq[self.eqid]
num = eq.num
a = Element.new 'a'
a.attributes['class'] = 'maruku-eqref'
a.attributes['href'] = "#eq:#{self.eqid}"
a << Text.new("(#{num})")
a
else
maruku_error "Cannot find equation #{self.eqid.inspect}"
Text.new "(#{self.eqid})"
end
end
end end end

View File

@ -0,0 +1,21 @@
module MaRuKu; module Out; module Latex
def to_latex_inline_math
"$#{self.math.strip}$"
end
def to_latex_equation
if self.label
l = "\\label{#{self.label}}"
"\\begin{equation}\n#{self.math.strip}\n#{l}\\end{equation}\n"
else
"\\begin{displaymath}\n#{self.math.strip}\n\\end{displaymath}\n"
end
end
def to_latex_eqref
"\\eqref{#{self.eqid}}"
end
end end end

259
lib/maruku/helpers.rb Normal file
View File

@ -0,0 +1,259 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
# A series of helper functions for creating elements: they hide the
# particular internal representation.
#
# Please, always use these instead of creating MDElement.
#
module MaRuKu
module Helpers
# if the first is a md_ial, it is used as such
def md_el(node_type, children=[], meta={}, al=nil)
if (e=children.first).kind_of?(MDElement) and
e.node_type == :ial then
if al
al += e.ial
else
al = e.ial
end
children.shift
end
e = MDElement.new(node_type, children, meta, al)
e.doc = @doc
return e
end
def md_header(level, children, al=nil)
md_el(:header, children, {:level => level}, al)
end
# Inline code
def md_code(code, al=nil)
md_el(:inline_code, [], {:raw_code => code}, al)
end
# Code block
def md_codeblock(source, al=nil)
md_el(:code, [], {:raw_code => source}, al)
end
def md_quote(children, al=nil)
md_el(:quote, children, {}, al)
end
def md_li(children, want_my_par, al=nil)
md_el(:li, children, {:want_my_paragraph=>want_my_par}, al)
end
def md_footnote(footnote_id, children, al=nil)
md_el(:footnote, children, {:footnote_id=>footnote_id}, al)
end
def md_abbr_def(abbr, text, al=nil)
md_el(:abbr_def, [], {:abbr=>abbr, :text=>text}, al)
end
def md_abbr(abbr, title)
md_el(:abbr, [abbr], {:title=>title})
end
def md_html(raw_html, al=nil)
e = md_el(:raw_html, [], {:raw_html=>raw_html})
begin
# remove newlines and whitespace at begin
# end end of string, or else REXML gets confused
raw_html = raw_html.gsub(/\A\s*</,'<').
gsub(/>[\s\n]*\Z/,'>')
raw_html = "<marukuwrap>#{raw_html}</marukuwrap>"
e.instance_variable_set :@parsed_html,
REXML::Document.new(raw_html)
rescue
# tell_user "Malformed block of HTML:\n"+
# add_tabs(raw_html,1,'|')
# " #{raw_html.inspect}\n\n"+ex.inspect
end
e
end
def md_link(children, ref_id, al=nil)
md_el(:link, children, {:ref_id=>ref_id.downcase}, al)
end
def md_im_link(children, url, title=nil, al=nil)
md_el(:im_link, children, {:url=>url,:title=>title}, al)
end
def md_image(children, ref_id, al=nil)
md_el(:image, children, {:ref_id=>ref_id}, al)
end
def md_im_image(children, url, title=nil, al=nil)
md_el(:im_image, children, {:url=>url,:title=>title},al)
end
def md_em(children, al=nil)
md_el(:emphasis, [children].flatten, {}, al)
end
def md_br()
md_el(:linebreak, [], {}, nil)
end
def md_hrule()
md_el(:hrule, [], {}, nil)
end
def md_strong(children, al=nil)
md_el(:strong, [children].flatten, {}, al)
end
def md_emstrong(children, al=nil)
md_strong(md_em(children), al)
end
# <http://www.example.com/>
def md_url(url, al=nil)
md_el(:immediate_link, [], {:url=>url}, al)
end
# <andrea@rubyforge.org>
# <mailto:andrea@rubyforge.org>
def md_email(email, al=nil)
md_el(:email_address, [], {:email=>email}, al)
end
def md_entity(entity_name, al=nil)
md_el(:entity, [], {:entity_name=>entity_name}, al)
end
# Markdown extra
def md_foot_ref(ref_id, al=nil)
md_el(:footnote_reference, [], {:footnote_id=>ref_id}, al)
end
def md_par(children, al=nil)
md_el(:paragraph, children, meta={}, al)
end
# [1]: http://url [properties]
def md_ref_def(ref_id, url, title=nil, meta={}, al=nil)
meta[:url] = url
meta[:ref_id] = ref_id
meta[:title] = title if title
md_el(:ref_definition, [], meta, al)
end
# inline attribute list
def md_ial(al)
al = Maruku::AttributeList.new(al) if
not al.kind_of?Maruku::AttributeList
md_el(:ial, [], {:ial=>al})
end
# Attribute list definition
def md_ald(id, al)
md_el(:ald, [], {:ald_id=>id,:ald=>al})
end
# Server directive <?target code... ?>
def md_xml_instr(target, code)
md_el(:xml_instr, [], {:target=>target, :code=>code})
end
end
end
module MaRuKu
class MDElement
# outputs abbreviated form (this should be eval()uable to get the document)
def inspect2
s =
case @node_type
when :paragraph
"md_par(%s)" % children_inspect
when :footnote_reference
"md_foot_ref(%s)" % self.footnote_id.inspect
when :entity
"md_entity(%s)" % self.entity_name.inspect
when :email_address
"md_email(%s)" % self.email.inspect
when :inline_code
"md_code(%s)" % self.raw_code.inspect
when :raw_html
"md_html(%s)" % self.raw_html.inspect
when :emphasis
"md_em(%s)" % children_inspect
when :strong
"md_strong(%s)" % children_inspect
when :immediate_link
"md_url(%s)" % self.url.inspect
when :image
"md_image(%s, %s)" % [
children_inspect,
self.ref_id.inspect]
when :im_image
"md_im_image(%s, %s, %s)" % [
children_inspect,
self.url.inspect,
self.title.inspect]
when :link
"md_link(%s,%s)" % [
children_inspect, self.ref_id.inspect]
when :im_link
"md_im_link(%s, %s, %s)" % [
children_inspect,
self.url.inspect,
self.title.inspect,
]
when :ref_definition
"md_ref_def(%s, %s, %s)" % [
self.ref_id.inspect,
self.url.inspect,
self.title.inspect
]
when :ial
"md_ial(%s)" % self.ial.inspect
else
return nil
end
if @al and not @al.empty? then
s = s.chop + ", #{@al.inspect})"
end
s
end
end
end

View File

@ -0,0 +1,325 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu; module In; module Markdown; module SpanLevelParser
# a string scanner coded by me
class CharSourceManual; end
# a wrapper around StringScanner
class CharSourceStrscan; end
# A debug scanner that checks the correctness of both
# by comparing their output
class CharSourceDebug; end
# Choose!
CharSource = CharSourceManual # faster! 58ms vs. 65ms
#CharSource = CharSourceStrscan
#CharSource = CharSourceDebug
class CharSourceManual
include MaRuKu::Strings
def initialize(s, parent=nil)
raise "Passed #{s.class}" if not s.kind_of? String
@buffer = s
@buffer_index = 0
@parent = parent
end
# Return current char as a FixNum (or nil).
def cur_char; @buffer[@buffer_index] end
# Return the next n chars as a String.
def cur_chars(n); @buffer[@buffer_index,n] end
# Return the char after current char as a FixNum (or nil).
def next_char; @buffer[@buffer_index+1] end
def shift_char
c = @buffer[@buffer_index]
@buffer_index+=1
c
end
def ignore_char
@buffer_index+=1
nil
end
def ignore_chars(n)
@buffer_index+=n
nil
end
def current_remaining_buffer
@buffer[@buffer_index, @buffer.size-@buffer_index]
end
def cur_chars_are(string)
# There is a bug here
if false
r2 = /^.{#{@buffer_index}}#{Regexp.escape string}/m
@buffer =~ r2
else
cur_chars(string.size) == string
end
end
def next_matches(r)
r2 = /^.{#{@buffer_index}}#{r}/m
md = r2.match @buffer
return !!md
end
def read_regexp3(r)
r2 = /^.{#{@buffer_index}}#{r}/m
m = r2.match @buffer
if m
consumed = m.to_s.size - @buffer_index
# puts "Consumed #{consumed} chars (entire is #{m.to_s.inspect})"
ignore_chars consumed
else
# puts "Could not read regexp #{r2.inspect} from buffer "+
# " index=#{@buffer_index}"
# puts "Cur chars = #{cur_chars(20).inspect}"
# puts "Matches? = #{cur_chars(20) =~ r}"
end
m
end
def read_regexp(r)
r2 = /^#{r}/
rest = current_remaining_buffer
m = r2.match(rest)
if m
@buffer_index += m.to_s.size
# puts "#{r} matched #{rest.inspect}: #{m.to_s.inspect}"
end
return m
end
def consume_whitespace
while c = cur_char
if (c == 32 || c == ?\t)
# puts "ignoring #{c}"
ignore_char
else
# puts "#{c} is not ws: "<<c
break
end
end
end
def read_text_chars(out)
s = @buffer.size; c=nil
while @buffer_index < s && (c=@buffer[@buffer_index]) &&
((c>=?a && c<=?z) || (c>=?A && c<=?Z))
out << c
@buffer_index += 1
end
end
def describe
s = describe_pos(@buffer, @buffer_index)
if @parent
s += "\n\n" + @parent.describe
end
s
end
include SpanLevelParser
end
def describe_pos(buffer, buffer_index)
len = 75
num_before = [len/2, buffer_index].min
num_after = [len/2, buffer.size-buffer_index].min
num_before_max = buffer_index
num_after_max = buffer.size-buffer_index
# puts "num #{num_before} #{num_after}"
num_before = [num_before_max, len-num_after].min
num_after = [num_after_max, len-num_before].min
# puts "num #{num_before} #{num_after}"
index_start = [buffer_index - num_before, 0].max
index_end = [buffer_index + num_after, buffer.size].min
size = index_end- index_start
# puts "- #{index_start} #{size}"
str = buffer[index_start, size]
str.gsub!("\n",'N')
str.gsub!("\t",'T')
if index_end == buffer.size
str += "EOF"
end
pre_s = buffer_index-index_start
pre_s = [pre_s, 0].max
pre_s2 = [len-pre_s,0].max
# puts "pre_S = #{pre_s}"
pre =" "*(pre_s)
"-"*len+"\n"+
str + "\n" +
"-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
# pre + "|\n"+
pre + "+--- Byte #{buffer_index}\n"+
"Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
add_tabs(buffer,1,">")
# "CharSource: At character #{@buffer_index} of block "+
# " beginning with:\n #{@buffer[0,50].inspect} ...\n"+
# " before: \n ... #{cur_chars(50).inspect} ... "
end
require 'strscan'
class CharSourceStrscan
include SpanLevelParser
include MaRuKu::Strings
def initialize(s)
@s = StringScanner.new(s)
end
# Return current char as a FixNum (or nil).
def cur_char
@s.peek(1)[0]
end
# Return the next n chars as a String.
def cur_chars(n);
@s.peek(n)
end
# Return the char after current char as a FixNum (or nil).
def next_char;
@s.peek(2)[1]
end
def shift_char
(@s.get_byte)[0]
end
def ignore_char
@s.get_byte
nil
end
def ignore_chars(n)
n.times do @s.get_byte end
nil
end
def current_remaining_buffer
@s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index]
end
def cur_chars_are(string)
cur_chars(string.size) == string
end
def next_matches(r)
len = @s.match?(r)
return !!len
end
def read_regexp(r)
string = @s.scan(r)
if string
return r.match(string)
else
return nil
end
end
def consume_whitespace
@s.scan /\s+/
nil
end
def describe
describe_pos(@s.string, @s.pos)
end
end
class CharSourceDebug
def initialize(s)
@a = CharSourceManual.new(s)
@b = CharSourceStrscan.new(s)
end
def method_missing(methodname, *args)
a_bef = @a.describe
b_bef = @b.describe
a = @a.send(methodname, *args)
b = @b.send(methodname, *args)
# if methodname == :describe
# return a
# end
if a.kind_of? MatchData
if a.to_a != b.to_a
puts "called: #{methodname}(#{args})"
puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
puts "AFTER: "+@a.describe
puts "AFTER: "+@b.describe
puts "BEFORE: "+a_bef
puts "BEFORE: "+b_bef
puts caller.join("\n")
exit
end
else
if a!=b
puts "called: #{methodname}(#{args})"
puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
puts ""+@a.describe
puts ""+@b.describe
puts caller.join("\n")
exit
end
end
if @a.cur_char != @b.cur_char
puts "Fuori sincronia dopo #{methodname}(#{args})"
puts ""+@a.describe
puts ""+@b.describe
exit
end
return a
end
end
end end end end

View File

@ -0,0 +1,68 @@
module MaRuKu; module In; module Markdown
# Hash Fixnum -> name
SpanExtensionsTrigger = {}
class SpanExtension
# trigging chars
attr_accessor :chars
# trigging regexp
attr_accessor :regexp
# lambda
attr_accessor :block
end
# Hash String -> Extension
SpanExtensions = {}
def check_span_extensions(src, con)
c = src.cur_char
if extensions = SpanExtensionsTrigger[c]
extensions.each do |e|
if e.regexp && (match = src.next_matches(e.regexp))
return true if e.block.call(doc, src, con)
end
end
end
return false # not special
end
def self.register_span_extension(args, &block)
e = SpanExtension.new
e.chars = [*args[:chars]]
e.regexp = args[:regexp]
e.block = block
e.chars.each do |c|
(SpanExtensionsTrigger[c] ||= []).push e
end
end
def self.register_block_extension(args, &block)
regexp = args[:regexp]
BlockExtensions[regexp] = block
end
# Hash Regexp -> Block
BlockExtensions = {}
def check_block_extensions(src, con, line)
BlockExtensions.each do |reg, block|
if m = reg.match(line)
block = BlockExtensions[reg]
return true if block.call(doc, src, con)
end
end
return false # not special
end
def any_matching_block_extension?(line)
BlockExtensions.each_key do |reg|
m = reg.match(line)
return m if m
end
return false
end
end end end

View File

@ -0,0 +1,144 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu; module In; module Markdown; module SpanLevelParser
# This class helps me read and sanitize HTML blocks
# I tried to do this with REXML, but wasn't able to. (suggestions?)
class HTMLHelper
include MaRuKu::Strings
Tag = %r{^<(/)?(\w+)\s*([^>]*)>}m
EverythingElse = %r{^[^<]+}m
CommentStart = %r{^<!--}x
CommentEnd = %r{^.*-->}
TO_SANITIZE = ['img','hr']
# attr_accessor :inside_comment
attr_reader :rest
def initialize
@rest = ""
@tag_stack = []
@m = nil
@already = ""
@inside_comment = false
end
def eat_this(line)
@rest = line + @rest
things_read = 0
until @rest.empty?
if @inside_comment
if @m = CommentEnd.match(@rest)
@inside_comment = false
@already += @m.pre_match + @m.to_s
@rest = @m.post_match
elsif @m = EverythingElse.match(@rest)
@already += @m.pre_match + @m.to_s
@rest = @m.post_match
end
else
if @m = CommentStart.match(@rest)
things_read += 1
@inside_comment = true
@already += @m.pre_match + @m.to_s
@rest = @m.post_match
elsif @m = Tag.match(@rest)
things_read += 1
@already += @m.pre_match
@rest = @m.post_match
is_closing = !!@m[1]
tag = @m[2]
attributes = @m[3]
is_single = false
if attributes =~ /\A(.*)\/\Z/
attributes = $1
is_single = true
end
if TO_SANITIZE.include? tag
attributes.strip!
# puts "Attributes: #{attributes.inspect}"
if attributes.size > 0
@already += '<%s %s />' % [tag, attributes]
else
@already += '<%s />' % [tag]
end
elsif is_closing
@already += @m.to_s
if @tag_stack.empty?
error "Malformed: closing tag #{tag.inspect} "+
"in empty list"
end
if @tag_stack.last != tag
error "Malformed: tag <#{tag}> "+
"closes <#{@tag_stack.last}>"
end
@tag_stack.pop
elsif not is_single
@tag_stack.push tag
@already += @m.to_s
end
elsif @m = EverythingElse.match(@rest)
@already += @m.pre_match + @m.to_s
@rest = @m.post_match
else
error "Malformed HTML: not complete: #{@rest.inspect}"
end
end # not inside comment
# puts inspect
# puts "Read: #{@tag_stack.inspect}"
break if is_finished? and things_read>0
end
end
def error(s)
raise Exception, "Error: #{s} \n"+ inspect, caller
end
def inspect; "HTML READER\n comment=#{@inside_comment} "+
"match=#{@m.to_s.inspect}\n"+
"Tag stack = #{@tag_stack.inspect} \n"+
"Before:\n"+
add_tabs(@already,1,'|')+"\n"+
"After:\n"+
add_tabs(@rest,1,'|')+"\n"
end
def stuff_you_read
@already
end
def is_finished?
not @inside_comment and @tag_stack.empty?
end
end # html helper
end end end end

View File

@ -0,0 +1,111 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu; module In; module Markdown; module BlockLevelParser
# This represents a source of lines that can be consumed.
#
# It is the twin of CharSource.
#
class LineSource
include MaRuKu::Strings
def initialize(lines, parent=nil, parent_offset=nil)
raise "NIL lines? " if not lines
@lines = lines
@lines_index = 0
@parent = parent
@parent_offset = parent_offset
end
def cur_line() @lines[@lines_index] end
def next_line() @lines[@lines_index+1] end
def shift_line()
raise "Over the rainbow" if @lines_index >= @lines.size
l = @lines[@lines_index]
@lines_index += 1
return l
end
def ignore_line
raise "Over the rainbow" if @lines_index >= @lines.size
@lines_index += 1
end
def describe
#s = "At line ##{@lines_index} of #{@lines.size}:\n"
s = "At line #{original_line_number(@lines_index)}\n"
context = 3 # lines
from = [@lines_index-context, 0].max
to = [@lines_index+context, @lines.size-1].min
for i in from..to
prefix = (i == @lines_index) ? '--> ' : ' ';
l = @lines[i]
s += "%10s %4s|#{l}" %
[@lines[i].md_type.to_s, prefix]
s += "|\n"
end
# if @parent
# s << "Parent context is: \n"
# s << add_tabs(@parent.describe,1,'|')
# end
s
end
def original_line_number(index)
if @parent
return index + @parent.original_line_number(@parent_offset)
else
1 + index
end
end
def cur_index
@lines_index
end
# Returns the type of next line as a string
# breaks at first :definition
def tell_me_the_future
s = ""; num_e = 0;
for i in @lines_index..@lines.size-1
c = case @lines[i].md_type
when :text; "t"
when :empty; num_e+=1; "e"
when :definition; "d"
else "o"
end
s += c
break if c == "d" or num_e>1
end
s
end
end # linesource
end end end end # block

View File

@ -0,0 +1,594 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu; module In; module Markdown; module BlockLevelParser
include Helpers
include MaRuKu::Strings
include MaRuKu::In::Markdown::SpanLevelParser
class BlockContext < Array
def describe
n = 5
desc = size > n ? self[-n,n] : self
"Last #{n} elements: "+
desc.map{|x| "\n -" + x.inspect}.join
end
end
# Splits the string and calls parse_lines_as_markdown
def parse_text_as_markdown(text)
lines = split_lines(text)
src = LineSource.new(lines)
return parse_blocks(src)
end
# Input is a LineSource
def parse_blocks(src)
output = BlockContext.new
# run state machine
while src.cur_line
next if check_block_extensions(src, output, src.cur_line)
# Prints detected type (useful for debugging)
# puts "#{src.cur_line.md_type}|#{src.cur_line}"
case src.cur_line.md_type
when :empty;
output.push :empty
src.ignore_line
when :ial
m = InlineAttributeList.match src.shift_line
content = m[1] || ""
src2 = CharSource.new(content, src)
interpret_extension(src2, output, [nil])
when :ald
output.push read_ald(src)
when :text
if src.cur_line =~ MightBeTableHeader and
(src.next_line && src.next_line =~ TableSeparator)
output.push read_table(src)
elsif [:header1,:header2].include? src.next_line.md_type
output.push read_header12(src)
elsif eventually_comes_a_def_list(src)
definition = read_definition(src)
if output.last.kind_of?(MDElement) &&
output.last.node_type == :definition_list then
output.last.children << definition
else
output.push md_el(:definition_list, [definition])
end
else # Start of a paragraph
output.push read_paragraph(src)
end
when :header2, :hrule
# hrule
src.shift_line
output.push md_hrule()
when :header3
output.push read_header3(src)
when :ulist, :olist
list_type = src.cur_line.md_type == :ulist ? :ul : :ol
li = read_list_item(src)
# append to current list if we have one
if output.last.kind_of?(MDElement) &&
output.last.node_type == list_type then
output.last.children << li
else
output.push md_el(list_type, [li])
end
when :quote; output.push read_quote(src)
when :code; e = read_code(src); output << e if e
when :raw_html; e = read_raw_html(src); output << e if e
when :footnote_text; output.push read_footnote_text(src)
when :ref_definition; output.push read_ref_definition(src)
when :abbreviation; output.push read_abbreviation(src)
when :xml_instr; read_xml_instruction(src, output)
when :metadata;
maruku_error "Please use the new meta-data syntax: \n"+
" http://maruku.rubyforge.org/proposal.html\n", src
src.ignore_line
else # warn if we forgot something
md_type = src.cur_line.md_type
line = src.cur_line
maruku_error "Ignoring line '#{line}' type = #{md_type}", src
src.shift_line
end
end
merge_ial(output, src, output)
output.delete_if {|x| x.kind_of?(MDElement) &&
x.node_type == :ial}
# get rid of empty line markers
output.delete_if {|x| x == :empty}
# See for each list if we can omit the paragraphs and use li_span
# TODO: do this after
output.each do |c|
# Remove paragraphs that we can get rid of
if [:ul,:ol].include? c.node_type
if c.children.all? {|li| !li.want_my_paragraph} then
c.children.each do |d|
d.node_type = :li_span
d.children = d.children[0].children
end
end
end
if c.node_type == :definition_list
if c.children.all?{|defi| !defi.want_my_paragraph} then
c.children.each do |definition|
definition.definitions.each do |dd|
dd.children = dd.children[0].children
end
end
end
end
end
output
end
def read_ald(src)
if (l=src.shift_line) =~ AttributeDefinitionList
id = $1; al=$2;
al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])
self.ald[id] = al;
return md_ald(id, al)
else
maruku_error "Bug Bug:\n#{l.inspect}"
return nil
end
end
# reads a header (with ----- or ========)
def read_header12(src)
line = src.shift_line.strip
al = nil
# Check if there is an IAL
if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
line = $1.strip
ial = $2
al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
end
text = parse_lines_as_span [ line ]
level = src.cur_line.md_type == :header2 ? 2 : 1;
src.shift_line
return md_header(level, text, al)
end
# reads a header like '#### header ####'
def read_header3(src)
line = src.shift_line.strip
al = nil
# Check if there is an IAL
if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
line = $1.strip
ial = $2
al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
end
level = num_leading_hashes(line)
text = parse_lines_as_span [strip_hashes(line)]
return md_header(level, text, al)
end
def read_xml_instruction(src, output)
m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
raise "BugBug" if not m
target = m[2] || ''
code = m[3]
until code =~ /\?>/
code += "\n"+src.shift_line
end
if not code =~ (/\?>\s*$/)
garbage = (/\?>(.*)$/.match(code))[1]
maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+
add_tabs(code, 1, '|'), src
end
code.gsub!(/\?>\s*$/, '')
if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
result = safe_execute_code(self, code)
if result
if result.kind_of? String
raise "Not expected"
else
output.push *result
end
end
else
output.push md_xml_instr(target, code)
end
end
def read_raw_html(src)
h = HTMLHelper.new
begin
h.eat_this(l=src.shift_line)
# puts "\nBLOCK:\nhtml -> #{l.inspect}"
while src.cur_line and not h.is_finished?
l=src.shift_line
# puts "html -> #{l.inspect}"
h.eat_this "\n"+l
end
rescue Exception => e
ex = e.inspect + e.backtrace.join("\n")
maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'|')}\n", src
end
raw_html = h.stuff_you_read
return md_html(raw_html)
end
def read_paragraph(src)
lines = []
while src.cur_line
# :olist does not break
case t = src.cur_line.md_type
when :quote,:header3,:empty,:raw_html,:ref_definition,:ial,:xml_instr
break
when :olist,:ulist
break if src.next_line.md_type == t
end
break if src.cur_line.strip.size == 0
break if [:header1,:header2].include? src.next_line.md_type
break if any_matching_block_extension?(src.cur_line)
lines << src.shift_line
end
# dbg_describe_ary(lines, 'PAR')
children = parse_lines_as_span(lines, src)
return md_par(children)
end
# Reads one list item, either ordered or unordered.
def read_list_item(src)
parent_offset = src.cur_index
item_type = src.cur_line.md_type
first = src.shift_line
# Ugly things going on inside `read_indented_content`
indentation = spaces_before_first_char(first)
break_list = [:ulist, :olist, :ial]
lines, want_my_paragraph =
read_indented_content(src,indentation, break_list, item_type)
# add first line
# Strip first '*', '-', '+' from first line
stripped = first[indentation, first.size-1]
lines.unshift stripped
#dbg_describe_ary(lines, 'LIST ITEM ')
src2 = LineSource.new(lines, src, parent_offset)
children = parse_blocks(src2)
with_par = want_my_paragraph || (children.size>1)
return md_li(children, with_par)
end
def read_abbreviation(src)
if not (l=src.shift_line) =~ Abbreviation
maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
end
abbr = $1
desc = $2
if (not abbr) or (abbr.size==0)
maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
end
self.abbreviations[abbr] = desc
return md_abbr_def(abbr, desc)
end
def read_footnote_text(src)
parent_offset = src.cur_index
first = src.shift_line
if not first =~ FootnoteText
maruku_error "Bug (it's Andrea's fault)"
end
id = $1
text = $2
# Ugly things going on inside `read_indented_content`
indentation = 4 #first.size-text.size
# puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
break_list = [:footnote_text]
item_type = :footnote_text
lines, want_my_paragraph =
read_indented_content(src,indentation, break_list, item_type)
# add first line
if text && text.strip != "" then lines.unshift text end
# dbg_describe_ary(lines, 'FOOTNOTE')
src2 = LineSource.new(lines, src, parent_offset)
children = parse_blocks(src2)
e = md_footnote(id, children)
self.footnotes[id] = e
return e
end
# This is the only ugly function in the code base.
# It is used to read list items, descriptions, footnote text
def read_indented_content(src, indentation, break_list, item_type)
lines =[]
# collect all indented lines
saw_empty = false; saw_anything_after = false
while src.cur_line
#puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"
if src.cur_line.md_type == :empty
saw_empty = true
lines << src.shift_line
next
end
# after a white line
if saw_empty
# we expect things to be properly aligned
if (ns=number_of_leading_spaces(src.cur_line)) < indentation
#puts "breaking for spaces, only #{ns}: #{src.cur_line}"
break
end
saw_anything_after = true
else
break if break_list.include? src.cur_line.md_type
# break if src.cur_line.md_type != :text
end
stripped = strip_indent(src.shift_line, indentation)
lines << stripped
#puts "Accepted as #{stripped.inspect}"
# You are only required to indent the first line of
# a child paragraph.
if stripped.md_type == :text
while src.cur_line && (src.cur_line.md_type == :text)
lines << strip_indent(src.shift_line, indentation)
end
end
end
want_my_paragraph = saw_anything_after ||
(saw_empty && (src.cur_line && (src.cur_line.md_type == item_type)))
# dbg_describe_ary(lines, 'LI')
# create a new context
while lines.last && (lines.last.md_type == :empty)
lines.pop
end
return lines, want_my_paragraph
end
def read_quote(src)
parent_offset = src.cur_index
lines = []
# collect all indented lines
while src.cur_line && src.cur_line.md_type == :quote
lines << unquote(src.shift_line)
end
# dbg_describe_ary(lines, 'QUOTE')
src2 = LineSource.new(lines, src, parent_offset)
children = parse_blocks(src2)
return md_quote(children)
end
def read_code(src)
# collect all indented lines
lines = []
while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)
lines << strip_indent(src.shift_line, 4)
end
#while lines.last && (lines.last.md_type == :empty )
while lines.last && lines.last.strip.size == 0
lines.pop
end
while lines.first && lines.first.strip.size == 0
lines.shift
end
return nil if lines.empty?
source = lines.join("\n")
# dbg_describe_ary(lines, 'CODE')
return md_codeblock(source)
end
# Reads a series of metadata lines with empty lines in between
def read_metadata(src)
hash = {}
while src.cur_line
case src.cur_line.md_type
when :empty; src.shift_line
when :metadata; hash.merge! parse_metadata(src.shift_line)
else break
end
end
hash
end
def read_ref_definition(src)
line = src.shift_line
# if link is incomplete, shift next line
if src.cur_line && (src.cur_line.md_type != :ref_definition) &&
([1,2,3].include? number_of_leading_spaces(src.cur_line) )
line += " "+ src.shift_line
end
# puts "total= #{line}"
match = LinkRegex.match(line)
if not match
error "Link does not respect format: '#{line}'"
end
id = match[1]; url = match[2]; title = match[3];
id = id.strip.downcase
hash = self.refs[id] = {:url=>url,:title=>title}
stuff=match[4]
if stuff
stuff.split.each do |couple|
# puts "found #{couple}"
k, v = couple.split('=')
v ||= ""
if v[0,1]=='"' then v = v[1, v.size-2] end
# puts "key:_#{k}_ value=_#{v}_"
hash[k.to_sym] = v
end
end
# puts hash.inspect
return md_ref_def(id, url, meta={:title=>title})
end
def read_table(src)
def split_cells(s)
s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
end
head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) }
separator=split_cells(src.shift_line)
align = separator.map { |s| s =~ Sep
if $1 and $2 then :center elsif $2 then :right else :left end }
num_columns = align.size
if head.size != num_columns
maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
tell_user "I will ignore this table."
# XXX try to recover
return md_br()
end
rows = []
while src.cur_line && src.cur_line =~ /\|/
row = split_cells(src.shift_line).map{|s|
md_el(:cell, parse_lines_as_span([s]))}
if head.size != num_columns
maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
tell_user "I will ignore this table."
# XXX try to recover
return md_br()
end
rows << row
end
children = (head+rows).flatten
return md_el(:table, children, {:align => align})
end
# If current line is text, a definition list is coming
# if 1) text,empty,[text,empty]*,definition
def eventually_comes_a_def_list(src)
future = src.tell_me_the_future
ok = future =~ %r{^t+e?d}x
# puts "future: #{future} - #{ok}"
ok
end
def read_definition(src)
# Read one or more terms
terms = []
while src.cur_line && src.cur_line.md_type == :text
terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))
end
# dbg_describe_ary(terms, 'DT')
want_my_paragraph = false
raise "Chunky Bacon!" if not src.cur_line
# one optional empty
if src.cur_line.md_type == :empty
want_my_paragraph = true
src.shift_line
end
raise "Chunky Bacon!" if src.cur_line.md_type != :definition
# Read one or more definitions
definitions = []
while src.cur_line && src.cur_line.md_type == :definition
parent_offset = src.cur_index
first = src.shift_line
first =~ Definition
first = $1
# I know, it's ugly!!!
lines, w_m_p =
read_indented_content(src,4, [:definition], :definition)
want_my_paragraph ||= w_m_p
lines.unshift first
# dbg_describe_ary(lines, 'DD')
src2 = LineSource.new(lines, src, parent_offset)
children = parse_blocks(src2)
definitions << md_el(:definition_data, children)
end
return md_el(:definition, terms+definitions, {
:terms => terms,
:definitions => definitions,
:want_my_paragraph => want_my_paragraph})
end
end # BlockLevelParser
end # MaRuKu
end
end

View File

@ -0,0 +1,225 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'iconv'
module MaRuKu; module In; module Markdown; module BlockLevelParser
def parse_doc(s)
meta2 = parse_email_headers(s)
data = meta2[:data]
meta2.delete :data
self.attributes.merge! meta2
=begin maruku_doc
Attribute: encoding
Scope: document
Summary: Encoding for the document.
If the `encoding` attribute is specified, then the content
will be converted from the specified encoding to UTF-8.
Conversion happens using the `iconv` library.
=end
enc = self.attributes[:encoding]
self.attributes.delete :encoding
if enc && enc.downcase != 'utf-8'
converted = Iconv.new('utf-8', enc).iconv(data)
# puts "Data: #{data.inspect}: #{data}"
# puts "Conv: #{converted.inspect}: #{converted}"
data = converted
end
@children = parse_text_as_markdown(data)
if true #markdown_extra?
self.search_abbreviations
self.substitute_markdown_inside_raw_html
end
toc = create_toc
# use title if not set
if not self.attributes[:title] and toc.header_element
title = toc.header_element.to_s
self.attributes[:title] = title
# puts "Set document title to #{title}"
end
# save for later use
self.toc = toc
# Now do the attributes magic
each_element do |e|
# default attribute list
if default = self.ald[e.node_type.to_s]
expand_attribute_list(default, e.attributes)
end
expand_attribute_list(e.al, e.attributes)
# puts "#{e.node_type}: #{e.attributes.inspect}"
end
=begin maruku_doc
Attribute: unsafe_features
Scope: global
Summary: Enables execution of XML instructions.
Disabled by default because of security concerns.
=end
if Maruku::Globals[:unsafe_features]
self.execute_code_blocks
# TODO: remove executed code blocks
end
end
# Expands an attribute list in an Hash
def expand_attribute_list(al, result)
al.each do |k, v|
case k
when :class
if not result[:class]
result[:class] = v
else
result[:class] += " " + v
end
when :id; result[:id] = v
when :ref;
if self.ald[v]
already = (result[:expanded_references] ||= [])
if not already.include?(v)
already.push v
expand_attribute_list(self.ald[v], result)
else
already.push v
maruku_error "Circular reference between labels.\n\n"+
"Label #{v.inspect} calls itself via recursion.\nThe recursion is "+
(already.map{|x| x.inspect}.join(' => '))
end
else
if not result[:unresolved_references]
result[:unresolved_references] = v
else
result[:unresolved_references] << " #{v}"
end
result[v.to_sym] = true
end
else
result[k.to_sym]=v
end
end
end
def safe_execute_code(object, code)
begin
return object.instance_eval(code)
rescue Exception => e
maruku_error "Exception while executing this:\n"+
add_tabs(code, 1, ">")+
"\nThe error was:\n"+
add_tabs(e.inspect+"\n"+e.caller.join("\n"), 1, "|")
rescue RuntimeError => e
maruku_error "2: Exception while executing this:\n"+
add_tabs(code, 1, ">")+
"\nThe error was:\n"+
add_tabs(e.inspect, 1, "|")
rescue SyntaxError => e
maruku_error "2: Exception while executing this:\n"+
add_tabs(code, 1, ">")+
"\nThe error was:\n"+
add_tabs(e.inspect, 1, "|")
end
nil
end
def execute_code_blocks
self.each_element(:xml_instr) do |e|
if e.target == 'maruku'
result = safe_execute_code(e, e.code)
if result.kind_of?(String)
puts "Result is : #{result.inspect}"
end
end
end
end
def search_abbreviations
self.abbreviations.each do |abbrev, title|
reg = Regexp.new(Regexp.escape(abbrev))
self.replace_each_string do |s|
if m = reg.match(s)
e = md_abbr(abbrev.dup, title ? title.dup : nil)
[m.pre_match, e, m.post_match]
else
s
end
end
end
end
include REXML
# (PHP Markdown extra) Search for elements that have
# markdown=1 or markdown=block defined
def substitute_markdown_inside_raw_html
self.each_element(:raw_html) do |e|
doc = e.instance_variable_get :@parsed_html
if doc # valid html
# parse block-level markdown elements in these HTML tags
block_tags = ['div']
# use xpath to find elements with 'markdown' attribute
XPath.match(doc, "//*[attribute::markdown]" ).each do |e|
# puts "Found #{e}"
# should we parse block-level or span-level?
parse_blocks = (e.attributes['markdown'] == 'block') ||
block_tags.include?(e.name)
# remove 'markdown' attribute
e.delete_attribute 'markdown'
# Select all text elements of e
XPath.match(e, "//text()" ).each { |original_text|
s = original_text.value.strip
if s.size > 0
el = md_el(:dummy,
parse_blocks ? parse_text_as_markdown(s) :
parse_lines_as_span([s]) )
p = original_text.parent
el.children_to_html.each do |x|
p.insert_before(original_text, x)
end
p.delete(original_text)
end
}
end
end
end
end
end end end end

View File

@ -0,0 +1,692 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'set'
module MaRuKu; module In; module Markdown; module SpanLevelParser
include MaRuKu::Helpers
EscapedCharInText =
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
EscapedCharInQuotes =
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
EscapedCharInInlineCode = [?\\,?`]
def parse_lines_as_span(lines, parent=nil)
parse_span_better lines.join("\n"), parent
end
def parse_span_better(string, parent=nil)
if not string.kind_of? String then
error "Passed #{string.class}." end
st = (string + "")
st.freeze
src = CharSource.new(st, parent)
read_span(src, EscapedCharInText, [nil])
end
# This is the main loop for reading span elements
#
# It's long, but not *complex* or difficult to understand.
#
#
def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
con = SpanContext.new
c = d = nil
while true
c = src.cur_char
# This is only an optimization which cuts 50% of the time used.
# (but you can't use a-zA-z in exit_on_chars)
if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
con.cur_string << src.shift_char
next
end
break if exit_on_chars && exit_on_chars.include?(c)
break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
# check if there are extensions
if check_span_extensions(src, con)
next
end
case c = src.cur_char
when ?\ # it's space (32)
if src.cur_chars_are " \n"
src.ignore_chars(3)
con.push_element md_br()
next
else
src.ignore_char
con.push_space
end
when ?\n, ?\t
src.ignore_char
con.push_space
when ?`
read_inline_code(src,con)
when ?<
# It could be:
# 1) HTML "<div ..."
# 2) HTML "<!-- ..."
# 3) url "<http:// ", "<ftp:// ..."
# 4) email "<andrea@... ", "<mailto:andrea@..."
# 5) on itself! "a < b "
# 6) Start of <<guillemettes>>
case d = src.next_char
when ?<; # guillemettes
src.ignore_chars(2)
con.push_char ?<
con.push_char ?<
when ?!;
if src.cur_chars_are '<!--'
read_inline_html(src, con)
else
con.push_char src.shift_char
end
when ??
read_xml_instr_span(src, con)
when ?\ , ?\t
con.push_char src.shift_char
else
if src.next_matches(/<mailto:/) or
src.next_matches(/<[\w\.]+\@/)
read_email_el(src, con)
elsif src.next_matches(/<\w+:/)
read_url_el(src, con)
elsif src.next_matches(/<\w/)
#puts "This is HTML: #{src.cur_chars(20)}"
read_inline_html(src, con)
else
#puts "This is NOT HTML: #{src.cur_chars(20)}"
con.push_char src.shift_char
end
end
when ?\\
d = src.next_char
if d == ?'
src.ignore_chars(2)
con.push_element md_entity('apos')
elsif d == ?"
src.ignore_chars(2)
con.push_element md_entity('quot')
elsif escaped.include? d
src.ignore_chars(2)
con.push_char d
else
con.push_char src.shift_char
end
when ?[
if markdown_extra? && src.next_char == ?^
read_footnote_ref(src,con)
else
read_link(src, con)
end
when ?!
if src.next_char == ?[
read_image(src, con)
else
con.push_char src.shift_char
end
when ?&
if m = src.read_regexp(/\&([\w\d]+);/)
con.push_element md_entity(m[1])
else
con.push_char src.shift_char
end
when ?*
if not src.next_char
maruku_error "Opening * as last char.", src, con
maruku_recover "Threating as literal"
con.push_char src.shift_char
else
follows = src.cur_chars(4)
if follows =~ /^\*\*\*[^\s\*]/
con.push_element read_emstrong(src,'***')
elsif follows =~ /^\*\*[^\s\*]/
con.push_element read_strong(src,'**')
elsif follows =~ /^\*[^\s\*]/
con.push_element read_em(src,'*')
else # * is just a normal char
con.push_char src.shift_char
end
end
when ?_
if not src.next_char
maruku_error "Opening _ as last char", src, con
maruku_recover "Threating as literal", src, con
con.push_char src.shift_char
else
follows = src.cur_chars(4)
if follows =~ /^\_\_\_[^\s\_]/
con.push_element read_emstrong(src,'___')
elsif follows =~ /^\_\_[^\s\_]/
con.push_element read_strong(src,'__')
elsif follows =~ /^\_[^\s\_]/
con.push_element read_em(src,'_')
else # _ is just a normal char
con.push_char src.shift_char
end
end
when ?{ # extension
src.ignore_char # {
interpret_extension(src, con, [?}])
src.ignore_char # }
when nil
maruku_error ("Unclosed span (waiting for %s"+
"#{exit_on_strings.inspect})") % [
exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
src,con
break
else # normal text
con.push_char src.shift_char
end # end case
end # end while true
con.push_string_if_present
# Assign IAL to elements
merge_ial(con.elements, src, con)
# Remove leading space
if (s = con.elements.first).kind_of? String
if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
con.elements.shift if s.size == 0
end
# Remove final spaces
if (s = con.elements.last).kind_of? String
s.chop! if s[-1] == ?\
con.elements.pop if s.size == 0
end
educated = educate(con.elements)
educated
end
def read_xml_instr_span(src, con)
src.ignore_chars(2) # starting <?
# read target <?target code... ?>
target = if m = src.read_regexp(/(\w+)/)
m[1]
else
''
end
delim = "?>"
code =
read_simple(src, escaped=[], break_on_chars=[],
break_on_strings=[delim])
src.ignore_chars delim.size
code = (code || "").strip
con.push_element md_xml_instr(target, code)
end
# Start: cursor on character **after** '{'
# End: curson on '}' or EOF
def interpret_extension(src, con, break_on_chars)
case src.cur_char
when ?:
src.ignore_char # :
extension_meta(src, con, break_on_chars)
when ?#, ?.
extension_meta(src, con, break_on_chars)
else
stuff = read_simple(src, escaped=[?}], break_on_chars, [])
if stuff =~ /^(\w+\s|[^\w])/
extension_id = $1.strip
if false
else
maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
"I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
extension_meta(src, con, break_on_chars)
end
else
maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
extension_meta(src, con, break_on_chars)
end
end
end
def extension_meta(src, con, break_on_chars)
if m = src.read_regexp(/(\w)+\:/)
name = m[1]
content = m[2]
al = read_attribute_list(src, con, break_on_chars)
self.doc.ald[name] = al
con.push md_ald(name, al)
else
al = read_attribute_list(src, con, break_on_chars)
self.doc.ald[name] = al
con.push md_ial(al)
end
end
def read_url_el(src,con)
src.ignore_char # leading <
url = read_simple(src, [], [?>])
src.ignore_char # closing >
con.push_element md_url(url)
end
def read_email_el(src,con)
src.ignore_char # leading <
mail = read_simple(src, [], [?>])
src.ignore_char # closing >
address = mail.gsub(/^mailto:/,'')
con.push_element md_email(address)
end
def read_url(src, break_on)
if [?',?"].include? src.cur_char
error 'Invalid char for url', src
end
url = read_simple(src, [], break_on)
if not url # empty url
url = ""
end
if url[0] == ?< && url[-1] == ?>
url = url[1, url.size-2]
end
if url.size == 0
return nil
end
url
end
def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
case src.cur_char
when ?', ?"
read_quoted(src, con)
else
read_simple(src, escaped, exit_on_chars)
end
end
# Tries to read a quoted value. If stream does not
# start with ' or ", returns nil.
def read_quoted(src, con)
case src.cur_char
when ?', ?"
quote_char = src.shift_char # opening quote
string = read_simple(src, EscapedCharInQuotes, [quote_char])
src.ignore_char # closing quote
return string
else
# puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
return nil
end
end
# Reads a simple string (no formatting) until one of break_on_chars,
# while escaping the escaped.
# If the string is empty, it returns nil.
# Raises on error if the string terminates unexpectedly.
# # If eat_delim is true, and if the delim is not the EOF, then the delim
# # gets eaten from the stream.
def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
text = ""
while true
# puts "Reading simple #{text.inspect}"
c = src.cur_char
if exit_on_chars && exit_on_chars.include?(c)
# src.ignore_char if eat_delim
break
end
break if exit_on_strings &&
exit_on_strings.any? {|x| src.cur_chars_are x}
case c
when nil
s= "String finished while reading (break on "+
"#{exit_on_chars.map{|x|""<<x}.inspect})"+
" already read: #{text.inspect}"
maruku_error s, src
maruku_recover "I boldly continue", src
break
when ?\\
d = src.next_char
if escaped.include? d
src.ignore_chars(2)
text << d
else
text << src.shift_char
end
else
text << src.shift_char
end
end
# puts "Read simple #{text.inspect}"
text.empty? ? nil : text
end
def read_em(src, delim)
src.ignore_char
children = read_span(src, EscapedCharInText, nil, [delim])
src.ignore_char
md_em(children)
end
def read_strong(src, delim)
src.ignore_chars(2)
children = read_span(src, EscapedCharInText, nil, [delim])
src.ignore_chars(2)
md_strong(children)
end
def read_emstrong(src, delim)
src.ignore_chars(3)
children = read_span(src, EscapedCharInText, nil, [delim])
src.ignore_chars(3)
md_emstrong(children)
end
SPACE = ?\ # = 32
# R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
# Reads a bracketed id "[refid]". Consumes also both brackets.
def read_ref_id(src, con)
src.ignore_char # [
src.consume_whitespace
# puts "Next: #{src.cur_chars(10).inspect}"
if m = src.read_regexp(R_REF_ID)
# puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
# puts "Then: #{src.cur_chars(10).inspect}"
m[1]
else
nil
end
end
def read_footnote_ref(src,con)
ref = read_ref_id(src,con)
con.push_element md_foot_ref(ref)
end
def read_inline_html(src, con)
h = HTMLHelper.new
begin
# This is our current buffer in the context
start = src.current_remaining_buffer
h.eat_this start
if not h.is_finished?
error "inline_html: Malformed:\n "+
"#{start.inspect}\n #{h.inspect}",src,con
end
consumed = start.size - h.rest.size
if consumed > 0
con.push_element md_html(h.stuff_you_read)
src.ignore_chars(consumed)
else
puts "HTML helper did not work on #{start.inspect}"
con.push_char src.shift_char
end
rescue Exception => e
maruku_error "Bad html: \n" +
add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
src,con
maruku_recover "I will try to continue after bad HTML.", src, con
con.push_char src.shift_char
end
end
def read_inline_code(src, con)
# Count the number of ticks
num_ticks = 0
while src.cur_char == ?`
num_ticks += 1
src.ignore_char
end
# We will read until this string
end_string = "`"*num_ticks
code =
read_simple(src, escaped=[], break_on_chars=[],
break_on_strings=[end_string])
# puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
src.ignore_chars num_ticks
# Ignore at most one space
if num_ticks > 1 && code[0] == SPACE
code = code[1, code.size-1]
end
# drop last space
if num_ticks > 1 && code[-1] == SPACE
code = code[0,code.size-1]
end
# puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
con.push_element md_code(code)
end
def read_link(src, con)
# we read the string and see what happens
src.ignore_char # opening bracket
children = read_span(src, EscapedCharInText, [?]])
src.ignore_char # closing bracket
# ignore space
if src.cur_char == SPACE and
(src.next_char == ?[ or src.next_char == ?( )
src.shift_char
end
case src.cur_char
when ?(
src.ignore_char # opening (
src.consume_whitespace
url = read_url(src, [SPACE,?\t,?)])
if not url
url = '' # no url is ok
end
src.consume_whitespace
title = nil
if src.cur_char != ?) # we have a title
quote_char = src.cur_char
title = read_quoted(src,con)
if not title
maruku_error 'Must quote title',src,con
else
# Tries to read a title with quotes: ![a](url "ti"tle")
# this is the most ugly thing in Markdown
if not src.next_matches(/\s*\)/)
# if there is not a closing par ), then read
# the rest and guess it's title with quotes
rest = read_simple(src, escaped=[], break_on_chars=[?)],
break_on_strings=[])
# chop the closing char
rest.chop!
title << quote_char << rest
end
end
end
src.consume_whitespace
closing = src.shift_char # closing )
if closing != ?)
maruku_error 'Unclosed link',src,con
maruku_recover "No closing ): I will not create"+
" the link for #{children.inspect}", src, con
con.push_elements children
return
end
con.push_element md_im_link(children,url, title)
when ?[ # link ref
ref_id = read_ref_id(src,con)
if ref_id
con.push_element md_link(children, ref_id)
else
maruku_error "Could not read ref_id", src, con
maruku_recover "I will not create the link for "+
"#{children.inspect}", src, con
con.push_elements children
return
end
else # empty [link]
con.push_element md_link(children, "")
end
end # read link
def read_image(src, con)
src.ignore_chars(2) # opening "!["
alt_text = read_span(src, EscapedCharInText, [?]])
src.ignore_char # closing bracket
# ignore space
if src.cur_char == SPACE and
(src.next_char == ?[ or src.next_char == ?( )
src.ignore_char
end
case src.cur_char
when ?(
src.ignore_char # opening (
src.consume_whitespace
url = read_url(src, [SPACE,?\t,?)])
if not url
error "Could not read url from #{src.cur_chars(10).inspect}",
src,con
end
src.consume_whitespace
title = nil
if src.cur_char != ?) # we have a title
quote_char = src.cur_char
title = read_quoted(src,con)
if not title
maruku_error 'Must quote title',src,con
else
# Tries to read a title with quotes: ![a](url "ti"tle")
# this is the most ugly thing in Markdown
if not src.next_matches(/\s*\)/)
# if there is not a closing par ), then read
# the rest and guess it's title with quotes
rest = read_simple(src, escaped=[], break_on_chars=[?)],
break_on_strings=[])
# chop the closing char
rest.chop!
title << quote_char << rest
end
end
end
src.consume_whitespace
closing = src.shift_char # closing )
if closing != ?)
error ("Unclosed link: '"<<closing<<"'")+
" Read url=#{url.inspect} title=#{title.inspect}",src,con
end
con.push_element md_im_image(alt_text, url, title)
when ?[ # link ref
ref_id = read_ref_id(src,con)
con.push_element md_image(alt_text, ref_id)
else # no stuff
con.push_elements children
end
end # read link
class SpanContext
include MaRuKu::Strings
# Read elements
attr_accessor :elements
attr_accessor :cur_string
def initialize
@elements = []
@cur_string = ""
end
def push_element(e)
raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
not (e.kind_of?(String) or e.kind_of?(MDElement))
push_string_if_present
@elements << e
nil
end
alias push push_element
def push_elements(a)
for e in a
if e.kind_of? String
e.each_byte do |b| push_char b end
else
push_element e
end
end
end
def push_string_if_present
if @cur_string.size > 0
@elements << @cur_string
@cur_string = ""
end
nil
end
def push_char(c)
@cur_string << c
nil
end
# push space into current string if
# there isn't one
def push_space
last = @cur_string[@cur_string.size-1]
@cur_string << ?\ if last != ?\
end
def describe
lines = @elements.map{|x| x.inspect}.join("\n")
s = "Elements read in span: \n" +
add_tabs(lines,1, ' -')+"\n"
if @cur_string.size > 0
s += "Current string: \n #{@cur_string.inspect}\n"
end
s
end
end # SpanContext
end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser

View File

@ -0,0 +1,225 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
#
# NOTA BENE:
#
# The following algorithm is a rip-off of RubyPants written by
# Christian Neukirchen.
#
# RubyPants is a Ruby port of SmartyPants written by John Gruber.
#
# This file is distributed under the GPL, which I guess is compatible
# with the terms of the RubyPants license.
#
# -- Andrea Censi
# = RubyPants -- SmartyPants ported to Ruby
#
# Ported by Christian Neukirchen <mailto:chneukirchen@gmail.com>
# Copyright (C) 2004 Christian Neukirchen
#
# Incooporates ideas, comments and documentation by Chad Miller
# Copyright (C) 2004 Chad Miller
#
# Original SmartyPants by John Gruber
# Copyright (C) 2003 John Gruber
#
#
# = RubyPants -- SmartyPants ported to Ruby
#
#
# [snip]
#
# == Authors
#
# John Gruber did all of the hard work of writing this software in
# Perl for Movable Type and almost all of this useful documentation.
# Chad Miller ported it to Python to use with Pyblosxom.
#
# Christian Neukirchen provided the Ruby port, as a general-purpose
# library that follows the *Cloth API.
#
#
# == Copyright and License
#
# === SmartyPants license:
#
# Copyright (c) 2003 John Gruber
# (http://daringfireball.net)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# * Neither the name "SmartyPants" nor the names of its contributors
# may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# This software is provided by the copyright holders and contributors
# "as is" and any express or implied warranties, including, but not
# limited to, the implied warranties of merchantability and fitness
# for a particular purpose are disclaimed. In no event shall the
# copyright owner or contributors be liable for any direct, indirect,
# incidental, special, exemplary, or consequential damages (including,
# but not limited to, procurement of substitute goods or services;
# loss of use, data, or profits; or business interruption) however
# caused and on any theory of liability, whether in contract, strict
# liability, or tort (including negligence or otherwise) arising in
# any way out of the use of this software, even if advised of the
# possibility of such damage.
#
# === RubyPants license
#
# RubyPants is a derivative work of SmartyPants and smartypants.py.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# This software is provided by the copyright holders and contributors
# "as is" and any express or implied warranties, including, but not
# limited to, the implied warranties of merchantability and fitness
# for a particular purpose are disclaimed. In no event shall the
# copyright owner or contributors be liable for any direct, indirect,
# incidental, special, exemplary, or consequential damages (including,
# but not limited to, procurement of substitute goods or services;
# loss of use, data, or profits; or business interruption) however
# caused and on any theory of liability, whether in contract, strict
# liability, or tort (including negligence or otherwise) arising in
# any way out of the use of this software, even if advised of the
# possibility of such damage.
#
#
# == Links
#
# John Gruber:: http://daringfireball.net
# SmartyPants:: http://daringfireball.net/projects/smartypants
#
# Chad Miller:: http://web.chad.org
#
# Christian Neukirchen:: http://kronavita.de/chris
module MaRuKu; module In; module Markdown; module SpanLevelParser
Punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
Close_class = %![^\ \t\r\n\\[\{\(\-]!
Rules = [
[/---/, :mdash ],
[/--/, :ndash ],
['...', :hellip ],
['. . .', :hellip ],
["``", :ldquo ],
["''", :rdquo ],
[/<<\s/, [:laquo, :nbsp] ],
[/\s>>/, [:nbsp, :raquo] ],
[/<</, :laquo ],
[/>>/, :raquo ],
# def educate_single_backticks(str)
# ["`", :lsquo]
# ["'", :rsquo]
# Special case if the very first character is a quote followed by
# punctuation at a non-word-break. Close the quotes by brute
# force:
[/^'(?=#{Punct_class}\B)/, :rsquo],
[/^"(?=#{Punct_class}\B)/, :rdquo],
# Special case for double sets of quotes, e.g.:
# <p>He said, "'Quoted' words in a larger quote."</p>
[/"'(?=\w)/, [:ldquo, :lsquo] ],
[/'"(?=\w)/, [:lsquo, :ldquo] ],
# Special case for decade abbreviations (the '80s):
[/'(?=\d\ds)/, :rsquo ],
# Get most opening single quotes:
[/(\s)'(?=\w)/, [:one, :lsquo] ],
# Single closing quotes:
[/(#{Close_class})'/, [:one, :rsquo]],
[/'(\s|s\b|$)/, [:rsquo, :one]],
# Any remaining single quotes should be opening ones:
[/'/, :lsquo],
# Get most opening double quotes:
[/(\s)"(?=\w)/, [:one, :ldquo]],
# Double closing quotes:
[/(#{Close_class})"/, [:one, :rdquo]],
[/"(\s|s\b|$)/, [:rdquo, :one]],
# Any remaining quotes should be opening ones:
[/"/, :ldquo]
].
map{|reg, subst| # People should do the thinking, machines should do the work.
reg = Regexp.new(Regexp.escape(reg)) if not reg.kind_of? Regexp
subst = [subst] if not subst.kind_of?Array
[reg, subst]}
# note: input will be destroyed
def apply_one_rule(reg, subst, input)
output = []
while first = input.shift
if first.kind_of?(String) && (m = reg.match(first))
output.push m. pre_match if m. pre_match.size > 0
input.unshift m.post_match if m.post_match.size > 0
subst.reverse.each do |x|
input.unshift( x == :one ? m[1] : md_entity(x.to_s) ) end
else
output.push first
end
end
return output
end
def educate(elements)
Rules.each do |reg, subst|
elements = apply_one_rule(reg, subst, elements)
end
# strips empty strings
elements.delete_if {|x| x.kind_of?(String) && x.size == 0}
final = []
# join consecutive strings
elements.each do |x|
if x.kind_of?(String) && final.last.kind_of?(String)
final.last << x
else
final << x
end
end
return final
end
end end end end

View File

@ -0,0 +1,141 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class String
include MaRuKu::Strings
def md_type()
@md_type ||= line_md_type(self)
end
end
class NilClass
def md_type() nil end
end
# This code does the classification of lines for block-level parsing.
module MaRuKu; module Strings
def line_md_type(l)
# The order of evaluation is important (:text is a catch-all)
return :text if l =~ /^[a-zA-Z]/
return :code if number_of_leading_spaces(l)>=4
return :empty if l =~ /^\s*$/
return :footnote_text if l =~ FootnoteText
return :ref_definition if l =~ LinkRegex or l=~ IncompleteLink
return :abbreviation if l =~ Abbreviation
return :definition if l =~ Definition
# I had a bug with emails and urls at the beginning of the
# line that were mistaken for raw_html
return :text if l=~EMailAddress or l=~ URL
# raw html is like PHP Markdown Extra: at most three spaces before
return :xml_instr if l =~ %r{^\s*<\?}
return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
return :raw_html if l =~ %r{[ ]{0,3}<\!\-\-}
return :ulist if l =~ /^\s?([\*\-\+])\s+.*\w+/
return :olist if l =~ /^\s?\d+\..*\w+/
return :header1 if l =~ /^(=)+/
return :header2 if l =~ /^([-\s])+$/
return :header3 if l =~ /^(#)+\s*\S+/
# at least three asterisks on a line, and only whitespace
return :hrule if l =~ /^(\s*\*\s*){3,1000}$/
return :hrule if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
return :hrule if l =~ /^(\s*_\s*){3,1000}$/ # or underscores
return :quote if l =~ /^>/
return :metadata if l =~ /^@/
# if @@new_meta_data?
return :ald if l =~ AttributeDefinitionList
return :ial if l =~ InlineAttributeList
# end
# return :equation_end if l =~ EquationEnd
return :text # else, it's just text
end
# $1 = id $2 = attribute list
AttributeDefinitionList = /^\s{0,3}\{([\w\d\s]+)\}:\s*(.*)\s*$/
#
InlineAttributeList = /^\s{0,3}\{(.*)\}\s*$/
# Example:
# ^:blah blah
# ^: blah blah
# ^ : blah blah
Definition = %r{
^ # begin of line
[ ]{0,3} # up to 3 spaces
: # colon
\s* # whitespace
(\S.*) # the text = $1
$ # end of line
}x
# Example:
# *[HTML]: Hyper Text Markup Language
Abbreviation = %r{
^ # begin of line
\* # one asterisk
\[ # opening bracket
([^\]]+) # any non-closing bracket: id = $1
\] # closing bracket
: # colon
\s* # whitespace
(\S.*\S)* # definition=$2
\s* # strip this whitespace
$ # end of line
}x
FootnoteText = %r{
^\s*\[(\^.+)\]: # id = $1 (including '^')
\s*(\S.*)?$ # text = $2 (not obb.)
}x
# This regex is taken from BlueCloth sources
# Link defs are in the form: ^[id]: \n? url "optional title"
LinkRegex = %r{
^[ ]*\[([^\]]+)\]: # id = $1
[ ]*
<?(\S+)>? # url = $2
[ ]*
(?:# Titles are delimited by "quotes" or (parens).
["(']
(.+?) # title = $3
[")'] # Matching ) or "
\s*(.+)? # stuff = $4
)? # title is optional
}x
IncompleteLink = %r{^\s*\[(.+)\]:\s*$}
HeaderWithId = /^(.*)\{\#([\w_-]+)\}\s*$/
HeaderWithAttributes = /^(.*)\{(.*)\}\s*$/
# if contains a pipe, it could be a table header
MightBeTableHeader = %r{\|}
# -------------:
Sep = /\s*(\:)?\s*-+\s*(\:)?\s*/
# | -------------:| ------------------------------ |
TableSeparator = %r{^(\|?#{Sep}\|?)+\s*$}
EMailAddress = /<([^:]+@[^:]+)>/
URL = /^<http:/
end end

33
lib/maruku/maruku.rb Normal file
View File

@ -0,0 +1,33 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
# The Maruku class is the public interface
class Maruku
def initialize(s=nil, meta={})
super(nil)
self.attributes.merge! meta
if s
parse_doc(s)
end
end
end

View File

@ -0,0 +1,689 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'rexml/document'
require 'rubygems'
require 'syntax'
require 'syntax/convertors/html'
class String
# A string is rendered into HTML by creating
# a REXML::Text node. REXML takes care of all the encoding.
def to_html
REXML::Text.new(self)
end
end
class REXML::Element
# We only want to output the children in Maruku::to_html
public :write_children
end
# This module groups all functions related to HTML export.
module MaRuKu; module Out; module HTML
include REXML
# Render as an HTML fragment (no head, just the content of BODY). (returns a string)
def to_html(context={})
indent = context[:indent] || -1
ie_hack = context[:ie_hack] ||true
div = Element.new 'dummy'
children_to_html.each do |e|
div << e
end
# render footnotes
if @doc.footnotes_order.size > 0
div << render_footnotes
end
doc = Document.new(nil,{:respect_whitespace =>:all})
doc << div
# REXML Bug? if indent!=-1 whitespace is not respected for 'pre' elements
# containing code.
xml =""
div.write_children(xml,indent,transitive=true,ie_hack)
xml
end
# Render to a complete HTML document (returns a string)
def to_html_document(context={})
indent = context[:indent] || -1
ie_hack = context[:ie_hack] ||true
doc = to_html_document_tree
xml = ""
# REXML Bug? if indent!=-1 whitespace is not respected for 'pre' elements
# containing code.
doc.write(xml,indent,transitive=true,ie_hack);
xhtml10strict = "
<?xml version='1.0' encoding='utf-8'?>
<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN'
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>\n"
xhtml11strict_mathml2 = '<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"
"http://www.w3.org/TR/MathML2/dtd/xhtml-math11-f.dtd" [
<!ENTITY mathml "http://www.w3.org/1998/Math/MathML">
]>
'
xhtml11_mathml2_svg11 =
'<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC
"-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
"http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
'
xhtml11_mathml2_svg11 + xml
end
def xml_newline() Text.new("\n") end
# Render to a complete HTML document (returns a REXML document tree)
def to_html_document_tree
doc = Document.new(nil,{:respect_whitespace =>:all})
# doc << XMLDecl.new
root = Element.new('html', doc)
root.add_namespace('http://www.w3.org/1999/xhtml')
root.add_namespace('svg', "http://www.w3.org/2000/svg" )
lang = self.attributes[:lang] || 'en'
root.attributes['xml:lang'] = lang
root << xml_newline
head = Element.new 'head', root
#<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
me = Element.new 'meta', head
me.attributes['http-equiv'] = 'Content-type'
# me.attributes['content'] = 'text/html;charset=utf-8'
me.attributes['content'] = 'application/xhtml+xml;charset=utf-8'
# Create title element
doc_title = self.attributes[:title] || self.attributes[:subject] || ""
title = Element.new 'title', head
title << Text.new(doc_title)
if css_list = self.attributes[:css]
css_list.split.each do |css|
# <link type="text/css" rel="stylesheet" href="..." />
link = Element.new 'link'
link.attributes['type'] = 'text/css'
link.attributes['rel'] = 'stylesheet'
link.attributes['href'] = css
head << link
head << xml_newline
end
end
root << xml_newline
body = Element.new 'body'
children_to_html.each do |e|
body << e
end
# render footnotes
if @doc.footnotes_order.size > 0
body << render_footnotes
end
# When we are rendering a whole document, we add a signature
# at the bottom.
if get_setting(:maruku_signature)
body << maruku_html_signature
end
root << body
doc
end
# returns "st","nd","rd" or "th" as appropriate
def day_suffix(day)
s = {
1 => 'st',
2 => 'nd',
3 => 'rd',
21 => 'st',
22 => 'nd',
23 => 'rd',
31 => 'st'
}
return s[day] || 'th';
end
# formats a nice date
def nice_date
t = Time.now
t.strftime(" at %H:%M on ")+
t.strftime("%A, %B %d")+
day_suffix(t.day)+
t.strftime(", %Y")
end
def maruku_html_signature
div = Element.new 'div'
div.attributes['class'] = 'maruku_signature'
Element.new 'hr', div
span = Element.new 'span', div
span.attributes['style'] = 'font-size: small; font-style: italic'
span << Text.new('Created by ')
a = Element.new('a', span)
a.attributes['href'] = 'http://maruku.rubyforge.org'
a.attributes['title'] = 'Maruku: a Markdown-superset interpreter for Ruby'
a << Text.new('Maruku')
span << Text.new(nice_date+".")
div
end
def render_footnotes
div = Element.new 'div'
div.attributes['class'] = 'footnotes'
div << Element.new('hr')
ol = Element.new 'ol'
@doc.footnotes_order.each_with_index do |fid, i| num = i+1
f = self.footnotes[fid]
if f
li = f.wrap_as_element('li')
li.attributes['id'] = "fn:#{num}"
a = Element.new 'a'
a.attributes['href'] = "#fnref:#{num}"
a.attributes['rev'] = 'footnote'
a<< Text.new('&#8617;', true, nil, true)
li.insert_after(li.children.last, a)
ol << li
else
maruku_error"Could not find footnote '#{fid}'"
end
end
div << ol
div
end
def to_html_hrule; create_html_element 'hr' end
def to_html_linebreak; Element.new 'br' end
# renders children as html and wraps into an element of given name
#
# Sets 'id' if meta is set
def wrap_as_element(name, attributes_to_copy=[])
m = create_html_element(name, attributes_to_copy)
children_to_html.each do |e| m << e; end
# m << Comment.new( "{"+self.al.to_md+"}") if not self.al.empty?
# m << Comment.new( @attributes.inspect) if not @attributes.empty?
m
end
StandardAttributes = [:id, :style, :class]
def create_html_element(name, attributes_to_copy=[])
m = Element.new name
(StandardAttributes+attributes_to_copy).each do |a|
if v = @attributes[a] then m.attributes[a.to_s] = v.to_s end
end
m
end
def to_html_ul
if @attributes[:toc]
# render toc
html_toc = @doc.toc.to_html
return html_toc
else
add_ws wrap_as_element('ul')
end
end
def to_html_paragraph; add_ws wrap_as_element('p') end
def to_html_ol; add_ws wrap_as_element('ol') end
def to_html_li; add_ws wrap_as_element('li') end
def to_html_li_span; add_ws wrap_as_element('li') end
def to_html_quote; add_ws wrap_as_element('blockquote') end
def to_html_strong; wrap_as_element('strong') end
def to_html_emphasis; wrap_as_element('em') end
# nil if not applicable, else string
def section_number
return nil if not @doc.attributes[:use_numbered_headers]
n = @attributes[:section_number]
if n && (not n.empty?)
n.join('.')+". "
else
nil
end
end
# nil if not applicable, else SPAN element
def render_section_number
# if we are bound to a section, add section number
if num = section_number
span = Element.new 'span'
span.attributes['class'] = 'maruku_section_number'
span << Text.new(section_number)
span
else
nil
end
end
def to_html_header
element_name = "h#{self.level}"
h = wrap_as_element element_name
if span = render_section_number
h.insert_before(h.children.first, span)
end
add_ws h
end
def source2html(source)
source = source.gsub(/&/,'&amp;')
source = Text.normalize(source)
Text.new(source, true, nil, false )
end
=begin maruku_doc
Attribute: html_use_syntax
Scope: document
Output: html
Summary: Enables the use of the `syntax` package.
Related: lang, code_lang
Default: <?mrk Globals[:html_use_syntax].to_s ?>
If false, Maruku does not append a signature to the
generated file.
=end
def to_html_code;
source = self.raw_code
lang = self.attributes[:lang] || @doc.attributes[:code_lang]
lang = 'xml' if lang=='html'
use_syntax = get_setting :html_use_syntax
element =
if use_syntax && lang
begin
convertor = Syntax::Convertors::HTML.for_syntax lang
# eliminate trailing newlines otherwise Syntax crashes
source = source.gsub(/\n*\Z/,'')
html = convertor.convert( source )
pre = Document.new(html, {:respect_whitespace =>:all}).root
pre.attributes['class'] = lang
pre
rescue Object => e
maruku_error"Error while using the syntax library for code:\n#{source.inspect}"+
"Lang is #{lang} object is: \n"+
self.inspect +
"\nException: #{e.class}: #{e.message}\n\t#{e.backtrace.join("\n\t")}"
tell_user("Using normal PRE because the syntax library did not work.")
to_html_code_using_pre(source)
end
else
to_html_code_using_pre(source)
end
color = get_setting(:code_background_color)
if color != Globals[:code_background_color]
element.attributes['style'] = "background-color: #{color};"
end
element
end
def to_html_code_using_pre(source)
pre = create_html_element 'pre'
code = Element.new 'code', pre
s = source
s = s.gsub(/&/,'&amp;')
s = Text.normalize(s)
s = s.gsub(/\&apos;/,'&#39;') # IE bug
s = s.gsub(/'/,'&#39;') # IE bug
if get_setting(:code_show_spaces)
# 187 = raquo
# 160 = nbsp
# 172 = not
s.gsub!(/\t/,'&#187;'+'&#160;'*3)
s.gsub!(/ /,'&#172;')
end
text = Text.new(s, respect_ws=true, parent=nil, raw=true )
code << text
pre
end
def to_html_inline_code;
pre = create_html_element 'code'
source = self.raw_code
pre << source2html(source)
color = get_setting(:code_background_color)
if color != Globals[:code_background_color]
pre.attributes['style'] = "background-color: #{color};"
end
pre
end
def to_html_immediate_link
a = create_html_element 'a'
url = self.url
text = url.gsub(/^mailto:/,'') # don't show mailto
a << Text.new(text)
a.attributes['href'] = url
a
end
def to_html_link
a = wrap_as_element 'a'
id = self.ref_id
# if empty, use text
if id.size == 0
id = children.to_s.downcase
end
if ref = @doc.refs[id]
url = ref[:url]
title = ref[:title]
a.attributes['href'] = url if url
a.attributes['title'] = title if title
else
maruku_error"Could not find ref_id = #{id.inspect} for #{self.inspect}"
tell_user "Not creating a link for ref_id = #{id.inspect}."
return wrap_as_element('span')
end
return a
end
def to_html_im_link
if url = self.url
title = self.title
a = wrap_as_element 'a'
a.attributes['href'] = url
a.attributes['title'] = title if title
return a
else
maruku_error"Could not find url in #{self.inspect}"
tell_user "Not creating a link for ref_id = #{id.inspect}."
return wrap_as_element('span')
end
end
def add_ws(e)
[Text.new("\n"), e, Text.new("\n")]
end
##### Email address
def obfuscate(s)
res = ''
s.each_byte do |char|
res += "&#%03d;" % char
end
res
end
def to_html_email_address
email = self.email
a = create_html_element 'a'
#a.attributes['href'] = Text.new("mailto:"+obfuscate(email),false,nil,true)
#a.attributes.add Attribute.new('href',Text.new(
#"mailto:"+obfuscate(email),false,nil,true))
# Sorry, for the moment it doesn't work
a.attributes['href'] = "mailto:#{email}"
a << Text.new(obfuscate(email),false,nil,true)
a
end
##### Images
def to_html_image
a = create_html_element 'img'
id = self.ref_id
if ref = @doc.refs[id]
url = ref[:url]
title = ref[:title]
a.attributes['src'] = url.to_s
a.attributes['alt'] = title.to_s
[:title, :class, :style].each do |s|
a.attributes[s.to_s] = ref[s] if ref[s]
end
else
maruku_error"Could not find id = #{id.inspect} for\n #{self.inspect}"
tell_user "Could not create image with ref_id = #{id.inspect};"+
+" Using SPAN element as replacement."
return wrap_as_element('span')
end
return a
end
def to_html_im_image
if not url = self.url
maruku_error"Image with no url: #{self.inspect}"
tell_user "Could not create image with ref_id = #{id.inspect};"+
+" Using SPAN element as replacement."
return wrap_as_element('span')
end
title = self.title
a = create_html_element 'img'
a.attributes['src'] = url
a.attributes['alt'] = title.to_s
return a
end
def to_html_raw_html
raw_html = self.raw_html
if rexml_doc = @parsed_html
root = rexml_doc.root
if root.nil?
s = "Bug in REXML: root() of Document is nil: \n#{rexml_doc.inspect}\n"+
"Raw HTML:\n#{raw_html.inspect}"
maruku_error s
tell_user 'The REXML version you have has a bug, omitting HTML'
div = Element.new 'div'
#div << Text.new(s)
return div
end
# copies the @children array (FIXME is it deep?)
elements = root.to_a
return elements
else # invalid
# Creates red box with offending HTML
tell_user "Wrapping bad html in a PRE with class 'markdown-html-error'\n"+
add_tabs(raw_html,1,'|')
pre = Element.new('pre')
pre.attributes['style'] = 'border: solid 3px red; background-color: pink'
pre.attributes['class'] = 'markdown-html-error'
pre << Text.new("HTML parse error: \n#{raw_html}", true)
return pre
end
end
def to_html_abbr
abbr = Element.new 'abbr'
abbr << Text.new(children[0])
abbr.attributes['title'] = self.title if self.title
abbr
end
def to_html_footnote_reference
id = self.footnote_id
# save the order of used footnotes
order = @doc.footnotes_order
# take next number
order << id
num = order.size;
sup = Element.new 'sup'
sup.attributes['id'] = "fnref:#{num}"
a = Element.new 'a'
a << Text.new(num.to_s)
a.attributes['href'] = "\#fn:#{num}"
a.attributes['rel'] = 'footnote'
sup << a
sup
end
## Definition lists ###
def to_html_definition_list() add_ws wrap_as_element('dl') end
def to_html_definition() children_to_html end
def to_html_definition_term() add_ws wrap_as_element('dt') end
def to_html_definition_data() add_ws wrap_as_element('dd') end
# FIXME: Ugly code
def to_html_table
align = self.align
num_columns = align.size
head = @children.slice(0, num_columns)
rows = []
i = num_columns
while i<@children.size
rows << @children.slice(i, num_columns)
i += num_columns
end
table = create_html_element 'table'
thead = Element.new 'thead'
tr = Element.new 'tr'
array_to_html(head).each do |x| tr<<x end
thead << tr
table << thead
tbody = Element.new 'tbody'
rows.each do |row|
tr = Element.new 'tr'
array_to_html(row).each_with_index do |x,i|
x.attributes['style'] ="text-align: #{align[i].to_s};"
tr<<x
end
tbody << tr << Text.new("\n")
end
table << tbody
table
end
def to_html_head_cell; wrap_as_element('th') end
def to_html_cell
if @attributes[:scope]
wrap_as_element('th', [:scope])
else
wrap_as_element('td')
end
end
def to_html_entity
MaRuKu::Out::Latex.need_entity_table
entity_name = self.entity_name
if (e = MaRuKu::Out::Latex::ENTITY_TABLE[entity_name]) && e.html_num
entity_name = e.html_num
end
# Fix for Internet Explorer
if entity_name == 'apos'
entity_name = 39
end
if entity_name.kind_of? Fixnum
# Entity.new(entity_name)
Text.new('&#%d;' % [entity_name], false, nil, true)
else
Text.new('&%s;' % [entity_name])
end
end
def to_html_xml_instr
target = self.target || ''
code = self.code || ''
REXML::Instruction.new(target, code)
end
# Convert each child to html
def children_to_html
array_to_html(@children)
end
def array_to_html(array)
e = []
array.each do |c|
method = c.kind_of?(MDElement) ?
"to_html_#{c.node_type}" : "to_html"
if not c.respond_to?(method)
#raise "Object does not answer to #{method}: #{c.class} #{c.inspect}"
next
end
h = c.send(method)
if h.nil?
raise "Nil html created by method #{method}:\n#{h.inspect}\n"+
" for object #{c.inspect[0,300]}"
end
if h.kind_of?Array
e = e + h #h.each do |hh| e << hh end
else
e << h
end
end
e
end
def to_html_ref_definition; [] end
def to_latex_ref_definition; [] end
end # HTML
end # out
end # MaRuKu

View File

@ -0,0 +1,538 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
class MDDocument
Latex_preamble_enc_cjk =
"\\usepackage[C40]{fontenc}
\\usepackage[cjkjis]{ucs}
\\usepackage[utf8x]{inputenc}"
Latex_preamble_enc_utf8 =
"\\usepackage{ucs}
\\usepackage[utf8x]{inputenc}"
def latex_require_package(p)
if not self.latex_required_packages.include? p
self.latex_required_packages.push p
end
end
# Render as a LaTeX fragment
def to_latex
children_to_latex
end
=begin maruku_doc
Attribute: maruku_signature
Scope: document
Output: html, latex
Summary: Enables Maruku's signature.
Default: true
If false, Maruku does not append a signature to the
generated file.
=end
# Render as a complete LaTeX document
def to_latex_document
body = to_latex
if get_setting(:maruku_signature)
body += render_latex_signature
end
required =
self.latex_required_packages.map {|p|
"\\usepackage{#{p}}\n"
}.join
=begin maruku_doc
Attribute: latex_cjk
Scope: document
Output: latex
Summary: Support for CJK characters.
If the `latex_cjk` attribute is specified, then appropriate headers
are added to the LaTeX preamble to support Japanese fonts.
You have to have these fonts installed -- and this can be a pain.
If `latex_cjk` is specified, this is added to the preamble:
<?mrk md_codeblock(Maruku::MDDocument::Latex_preamble_enc_cjk) ?>
while the default is to add this:
<?mrk md_codeblock(Maruku::MDDocument::Latex_preamble_enc_utf8) ?>
=end
encoding = @doc.attributes[:latex_cjk] ?
Latex_preamble_enc_cjk : Latex_preamble_enc_utf8
=begin maruku_doc
Attribute: latex_preamble
Scope: document
Output: latex
Summary: User-defined preamble.
If the `latex_preamble` attribute is specified, then its value
will be used as a custom preamble.
For example:
Title: My document
Latex preamble: preamble.tex
will produce:
...
\input{preamble.tex}
...
=end
user_preamble = (file = @doc.attributes[:latex_preamble]) ?
"\\input{#{file}}\n" : ""
"\\documentclass{article}
% Packages required to support encoding
#{encoding}
% Packages required by code
#{required}
% Packages always used
\\usepackage{hyperref}
\\usepackage{xspace}
\\usepackage[usenames,dvipsnames]{color}
\\usepackage[margin=1in]{geometry}
\\hypersetup{colorlinks=true,urlcolor=blue}
#{user_preamble}
\\begin{document}
#{body}
\\end{document}
"
end
def render_latex_signature
"\\vfill
\\hrule
\\vspace{1.2mm}
\\begin{tiny}
Created by \\href{http://maruku.rubyforge.org}{Maruku} #{self.nice_date}.
\\end{tiny}"
end
end end
module MaRuKu; module Out; module Latex
def to_latex_hrule; "\n\\vspace{.5em} \\hrule \\vspace{.5em}\n" end
def to_latex_linebreak; "\\linebreak " end
def to_latex_paragraph
children_to_latex+"\n\n"
end
=begin maruku_doc
Title: Input format for colors
Output: latex, html
Related: code_background_color
Admissible formats:
green
#abc
#aabbcc
=end
# \color[named]{name}
# \color[rgb]{1,0.2,0.3}
def latex_color(s, command='color')
if s =~ /^\#(\w\w)(\w\w)(\w\w)$/
r = $1.hex; g = $2.hex; b=$3.hex
# convert from 0-255 to 0.0-1.0
r = r / 255.0; g = g / 255.0; b = b / 255.0;
"\\#{command}[rgb]{%0.2f,%0.2f,%0.2f}" % [r,g,b]
elsif s =~ /^\#(\w)(\w)(\w)$/
r = $1.hex; g = $2.hex; b=$3.hex
# convert from 0-15 to 0.0-1.0
r = r / 15.0; g = g / 15.0; b = b / 15.0;
"\\#{command}[rgb]{%0.2f,%0.2f,%0.2f}" % [r,g,b]
else
"\\#{command}{#{s}}"
end
end
def to_latex_code;
raw_code = self.raw_code
=begin maruku_doc
Attribute: latex_use_listings
Scope: document
Output: latex
Summary: Support for `listings` package.
Related: code_show_spaces, code_background_color, lang, code_lang
If the `latex_use_listings` attribute is specified, then
code block are rendered using the `listings` package.
Otherwise, a standard `verbatim` environment is used.
* If the `lang` attribute for the code block has been specified,
it gets passed to the `listings` package using the `lstset` macro.
The default lang for code blocks is specified through
the `code_lang` attribute.
\lstset{language=ruby}
Please refer to the documentation of the `listings` package for
supported languages.
If a language is not supported, the `listings` package will emit
a warning during the compilation. Just press enter and nothing
wrong will happen.
* If the `code_show_spaces` is specified, than spaces and tabs will
be shown using the macro:
\lstset{showspaces=true,showtabs=true}
* The background color is given by `code_background_color`.
=end
if @doc.attributes[:latex_use_listings]
@doc.latex_require_package('listings')
s = "\\lstset{columns=fixed,frame=shadowbox}"
if get_setting(:code_show_spaces)
s+= "\\lstset{showspaces=true,showtabs=true}\n"
else
s+= "\\lstset{showspaces=false,showtabs=false}\n"
end
color = latex_color get_setting(:code_background_color)
s+= "\\lstset{backgroundcolor=#{color}}\n"
s+= "\\lstset{basicstyle=\\ttfamily\\footnotesize}\n"
lang = self.attributes[:lang] || @doc.attributes[:code_lang] || '{}'
if lang
s += "\\lstset{language=#{lang}}\n"
end
"#{s}\n\\begin{lstlisting}\n#{raw_code}\n\\end{lstlisting}"
else
"\\begin{verbatim}#{raw_code}\\end{verbatim}\n"
end
end
TexHeaders = {
1=>'section',
2=>'subsection',
3=>'subsubsection',
4=>'paragraph'}
def to_latex_header
h = TexHeaders[self.level] || 'paragraph'
title = children_to_latex
if number = section_number
title = number + title
end
if id = self.attributes[:id]
# drop '#' at the beginning
if id[0,1] == '#' then id = [1,id.size] end
%{\\hypertarget{%s}{}\\%s*{{%s}}\\label{%s}\n\n} % [ id, h, title, id ]
else
%{\\%s*{%s}\n\n} % [ h, title]
end
end
def to_latex_ul;
if self.attributes[:toc]
@doc.toc.to_latex
else
wrap_as_environment('itemize')
end
end
def to_latex_quote; wrap_as_environment('quote') end
def to_latex_ol; wrap_as_environment('enumerate') end
def to_latex_li;
"\\item #{children_to_latex}\n"
end
def to_latex_li_span;
"\\item #{children_to_latex}\n"
end
def to_latex_strong
"\\textbf{#{children_to_latex}}"
end
def to_latex_emphasis
"\\emph{#{children_to_latex}}"
end
def wrap_as_span(c)
"{#{c} #{children_to_latex}}"
end
def wrap_as_environment(name)
"\\begin{#{name}}%
#{children_to_latex}
\\end{#{name}}\n"
end
SAFE_CHARS = Set.new([?\ ] + (?a..?z).to_a + (?A..?Z).to_a)
# the ultimate escaping
# (is much better than using \verb)
def latex_escape(source)
s="";
source.each_byte do |b|
if b == ?\
s << '~'
elsif SAFE_CHARS.include? b
s << b
else
s += "\\char%d" % b
end
end
s
end
def to_latex_inline_code;
source = self.raw_code
# Convert to printable latex chars
s = latex_escape(source)
color = get_setting(:code_background_color)
colorspec = latex_color(color, 'colorbox')
"#{colorspec}{\\tt #{s}}"
end
def to_latex_immediate_link
a = create_html_element 'a'
url = self.url
text = url.gsub(/^mailto:/,'') # don't show mailto
# gsub('~','$\sim$')
text = latex_escape(text)
if url[0,1] == '#'
url = url[1,url.size]
return "\\hyperlink{#{url}}{#{text}}"
else
return "\\href{#{url}}{#{text}}"
end
end
def to_latex_im_link
url = self.url
if url[0,1] == '#'
url = url[1,url.size]
return "\\hyperlink{#{url}}{#{children_to_latex}}"
else
return "\\href{#{url}}{#{children_to_latex}}"
end
end
def to_latex_link
id = self.ref_id
# if empty, use text
if id.size == 0
id = children.to_s.downcase
end
ref = @doc.refs[id]
if not ref
$stderr.puts "Could not find id = '#{id}'"
return children_to_latex
else
url = ref[:url]
#title = ref[:title] || 'no title'
if url[0,1] == '#'
url = url[1,url.size]
return "\\hyperlink{#{url}}{#{children_to_latex}}"
else
return "\\href{#{url}}{#{children_to_latex}}"
end
end
end
def to_latex_email_address
email = self.email
"\\href{mailto:#{email}}{#{latex_escape(email)}}"
end
def to_latex_table
align = self.align
num_columns = align.size
head = @children.slice(0, num_columns)
rows = []
i = num_columns
while i<@children.size
rows << @children.slice(i, num_columns)
i+=num_columns
end
h = {:center=>'c',:left=>'l',:right=>'r'}
align_string = align.map{|a| h[a]}.join('|')
s = "\\begin{tabular}{#{align_string}}\n"
s += array_to_latex(head, '&') + "\\\\" +"\n"
s += "\\hline \n"
rows.each do |row|
s += array_to_latex(row, '&') + "\\\\" +"\n"
end
s += "\\end{tabular}"
# puts table in its own paragraph
s += "\n\n"
s
end
def to_latex_head_cell; children_to_latex end
def to_latex_cell; children_to_latex end
def to_latex_footnote_reference
id = self.footnote_id
f = @doc.footnotes[id]
if f
"\\footnote{#{f.children_to_latex.strip}} "
else
$stderr.puts "Could not find footnote '#{fid}'"
end
end
def to_latex_raw_html
#'{\bf Raw HTML removed in latex version }'
""
end
## Definition lists ###
def to_latex_definition_list
s = "\\begin{description}\n"
s += children_to_latex
s += "\\end{description}\n"
s
end
def to_latex_definition
terms = self.terms
definitions = self.definitions
s = ""
terms.each do |t|
s +="\n\\item[#{t.children_to_latex}] "
end
definitions.each do |d|
s += "#{d.children_to_latex} \n"
end
s
end
def to_latex_abbr
children_to_latex
end
def to_latex_image
id = self.ref_id
ref = @doc.refs[id]
if not ref
$stderr.puts "Could not find id = '#{id}'"
""
else
url = ref[:url]
"{\\bf Images not supported yet (#{latex_escape(url)})}"
end
end
# Convert each child to html
def children_to_latex
array_to_latex(@children)
end
def array_to_latex(array, join_char='')
e = []
array.each do |c|
method = c.kind_of?(MDElement) ?
"to_latex_#{c.node_type}" : "to_latex"
if not c.respond_to?(method)
# raise "Object does not answer to #{method}: #{c.class} #{c.inspect[0,100]}"
next
end
h = c.send(method)
if h.nil?
raise "Nil html for #{c.inspect} created with method #{method}"
end
if h.kind_of?Array
e = e + h
else
e << h
end
end
# puts a space after commands if needed
e.each_index do |i|
if e[i] =~ /\\\w+\s*$/ # command
if (s=e[i+1]) && s[0] == ?\ # space
e[i] = e[i] + "\\ "
end
end
end
e.join(join_char)
end
end end end # MaRuKu::Out::Latex

View File

@ -0,0 +1,367 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'rexml/document'
module MaRuKu; module Out; module Latex
include REXML
def to_latex_entity
MaRuKu::Out::Latex.need_entity_table
entity_name = self.entity_name
entity = ENTITY_TABLE[entity_name]
if not entity
maruku_error "I don't know how to translate entity '#{entity_name}' "+
"to LaTeX."
return ""
end
replace = entity.latex_string
entity.latex_packages.each do |p|
@doc.latex_require_package p
end
if replace =~ /^\\/
replace = replace + " "
end
if replace
return replace
else
tell_user "Cannot translate entity #{entity_name.inspect} to LaTeX."
return entity_name
end
end
class LatexEntity
safe_attr_accessor :html_num, Fixnum
safe_attr_accessor :html_entity, String
safe_attr_accessor :latex_string, String
safe_attr_accessor :latex_packages, Array
end
def Latex.need_entity_table
Latex.init_entity_table if ENTITY_TABLE.empty?
end
# create hash @@entity_to_latex
def Latex.init_entity_table
$stderr.write "Creating entity table.."
$stderr.flush
doc = Document.new XML_TABLE
doc.elements.each("//char") do |c|
num = c.attributes['num'].to_i
name = c.attributes['name']
package = c.attributes['package']
convert = c.attributes['convertTo']
convert.gsub!(/@DOUBLEQUOT/,'"')
convert.gsub!(/@QUOT/,"'")
convert.gsub!(/@GT/,">")
convert.gsub!(/@LT/,"<")
convert.gsub!(/@AMP/,"&")
convert.freeze
e = LatexEntity.new
e.html_num = num
e.html_entity = name
e.latex_string = convert
e.latex_packages = package ? package.split : []
ENTITY_TABLE[num] = e
ENTITY_TABLE[name] = e
end
$stderr.puts "..done."
end
ENTITY_TABLE = {}
# The following is a conversion chart for html elements, courtesy of
# text2html
XML_TABLE ="
<chars>
<char num='913' name='Alpha' convertTo='$A$' />
<char num='914' name='Beta' convertTo='$B$' />
<char num='915' name='Gamma' convertTo='$\\Gamma$' />
<char num='916' name='Delta' convertTo='$\\Delta$' />
<char num='917' name='Epsilon' convertTo='$E$' />
<char num='918' name='Zeta' convertTo='$Z$' />
<char num='919' name='Eta' convertTo='$H$' />
<char num='920' name='Theta' convertTo='$\\Theta$' />
<char num='921' name='Iota' convertTo='$I$' />
<char num='922' name='Kappa' convertTo='$K$' />
<char num='923' name='Lambda' convertTo='$\\Lambda$' />
<char num='924' name='Mu' convertTo='$M$' />
<char num='925' name='Nu' convertTo='$N$' />
<char num='926' name='Xi' convertTo='$\\Xi$' />
<char num='927' name='Omicron' convertTo='$O$' />
<char num='928' name='Pi' convertTo='$\\Pi$' />
<char num='929' name='Rho' convertTo='$P$' />
<char num='931' name='Sigma' convertTo='$\\Sigma$' />
<char num='932' name='Tau' convertTo='$T$' />
<char num='933' name='Upsilon' convertTo='$Y$' />
<char num='934' name='Phi' convertTo='$\\Phi$' />
<char num='935' name='Chi' convertTo='$X$' />
<char num='936' name='Psi' convertTo='$\\Psi$' />
<char num='937' name='Omega' convertTo='$\\Omega$' />
<char num='945' name='alpha' convertTo='$\\alpha$' />
<char num='946' name='beta' convertTo='$\\beta$' />
<char num='947' name='gamma' convertTo='$\\gamma$' />
<char num='948' name='delta' convertTo='$\\delta$' />
<char num='949' name='epsilon' convertTo='$\\epsilon$' />
<char num='950' name='zeta' convertTo='$\\zeta$' />
<char num='951' name='eta' convertTo='$\\eta$' />
<char num='952' name='theta' convertTo='$\\theta$' />
<char num='953' name='iota' convertTo='$\\iota$' />
<char num='954' name='kappa' convertTo='$\\kappa$' />
<char num='955' name='lambda' convertTo='$\\lambda$' />
<char num='956' name='mu' convertTo='$\\mu$' />
<char num='957' name='nu' convertTo='$\\nu$' />
<char num='958' name='xi' convertTo='$\\xi$' />
<char num='959' name='omicron' convertTo='$o$' />
<char num='960' name='pi' convertTo='$\\pi$' />
<char num='961' name='rho' convertTo='$\\rho$' />
<char num='963' name='sigma' convertTo='$\\sigma$' />
<char num='964' name='tau' convertTo='$\\tau$' />
<char num='965' name='upsilon' convertTo='$\\upsilon$' />
<char num='966' name='phi' convertTo='$\\phi$' />
<char num='967' name='chi' convertTo='$\\chi$' />
<char num='968' name='psi' convertTo='$\\psi$' />
<char num='969' name='omega' convertTo='$\\omega$' />
<char num='962' name='sigmaf' convertTo='$\\varsigma$' />
<char num='977' name='thetasym' convertTo='$\\vartheta$' />
<char num='982' name='piv' convertTo='$\\varpi$' />
<char num='8230' name='hellip' convertTo='\\ldots' />
<char num='8242' name='prime' convertTo='$\\prime$' />
<char num='8254' name='oline' convertTo='-' />
<char num='8260' name='frasl' convertTo='/' />
<char num='8472' name='weierp' convertTo='$\\wp$' />
<char num='8465' name='image' convertTo='$\\Im$' />
<char num='8476' name='real' convertTo='$\\Re$' />
<char num='8501' name='alefsym' convertTo='$\\aleph$' />
<char num='8226' name='bull' convertTo='$\\bullet$' />
<char num='8482' name='trade' convertTo='$^{\\rm TM}$' /> <!-- \texttrademark -->
<char num='8592' name='larr' convertTo='$\\leftarrow$' />
<char num='8594' name='rarr' convertTo='$\\rightarrow$' />
<char num='8593' name='uarr' convertTo='$\\uparrow$' />
<char num='8595' name='darr' convertTo='$\\downarrow$' />
<char num='8596' name='harr' convertTo='$\\leftrightarrow$' />
<char num='8629' name='crarr' convertTo='$\\hookleftarrow$' />
<char num='8657' name='uArr' convertTo='$\\Uparrow$' />
<char num='8659' name='dArr' convertTo='$\\Downarrow$' />
<char num='8656' name='lArr' convertTo='$\\Leftarrow$' />
<char num='8658' name='rArr' convertTo='$\\Rightarrow$' />
<char num='8660' name='hArr' convertTo='$\\Leftrightarrow$' />
<char num='8704' name='forall' convertTo='$\\forall$' />
<char num='8706' name='part' convertTo='$\\partial$' />
<char num='8707' name='exist' convertTo='$\\exists$' />
<char num='8709' name='empty' convertTo='$\\emptyset$' />
<char num='8711' name='nabla' convertTo='$\\nabla$' />
<char num='8712' name='isin' convertTo='$\\in$' />
<char num='8715' name='ni' convertTo='$\\ni$' />
<char num='8713' name='notin' convertTo='$\\notin$' />
<char num='8721' name='sum' convertTo='$\\sum$' />
<char num='8719' name='prod' convertTo='$\\prod$' />
<char num='8722' name='minus' convertTo='$-$' />
<char num='8727' name='lowast' convertTo='$\\ast$' />
<char num='8730' name='radic' convertTo='$\\surd$' />
<char num='8733' name='prop' convertTo='$\\propto$' />
<char num='8734' name='infin' convertTo='$\\infty$' />
<char num='8736' name='ang' convertTo='$\\angle$' />
<char num='8743' name='and' convertTo='$\\wedge$' />
<char num='8744' name='or' convertTo='$\\vee$' />
<char num='8745' name='cup' convertTo='$\\cup$' />
<char num='8746' name='cap' convertTo='$\\cap$' />
<char num='8747' name='int' convertTo='$\\int$' />
<char num='8756' name='there4' convertTo='$\\therefore$' package='amssymb' /> <!-- only AMS -->
<char num='8764' name='sim' convertTo='$\\sim$' />
<char num='8776' name='asymp' convertTo='$\\approx$' />
<char num='8773' name='cong' convertTo='$\\cong$' />
<char num='8800' name='ne' convertTo='$\\neq$' />
<char num='8801' name='equiv' convertTo='$\\equiv$' />
<char num='8804' name='le' convertTo='$\\leq$' />
<char num='8805' name='ge' convertTo='$\\geq$' />
<char num='8834' name='sub' convertTo='$\\subset$' />
<char num='8835' name='sup' convertTo='$\\supset$' />
<!-- <char num='8838' name='sube' convertTo='$\\subseteq$' />-->
<char num='8839' name='supe' convertTo='$\\supseteq$' />
<!-- <char num='8836' name='nsub' convertTo='$\\nsubset$' /> <!-- only AMS -->
<char num='8853' name='oplus' convertTo='$\\oplus$' />
<char num='8855' name='otimes' convertTo='$\\otimes$' />
<char num='8869' name='perp' convertTo='$\\perp$' />
<char num='8901' name='sdot' convertTo='$\\cdot$' />
<char num='8968' name='rceil' convertTo='$\\rceil$' />
<char num='8969' name='lceil' convertTo='$\\lceil$' />
<char num='8970' name='lfloor' convertTo='$\\lfloor$' />
<char num='8971' name='rfloor' convertTo='$\\rfloor$' />
<char num='9001' name='rang' convertTo='$\\rangle$' />
<char num='9002' name='lang' convertTo='$\\langle$' />
<char num='9674' name='loz' convertTo='$\\lozenge$' package='amssymb' /> <!-- only AMS -->
<char num='9824' name='spades' convertTo='$\\spadesuit$' />
<char num='9827' name='clubs' convertTo='$\\clubsuit$' />
<char num='9829' name='hearts' convertTo='$\\heartsuit$' />
<char num='9830' name='diams' convertTo='$\\diamondsuit$' />
<char num='38' name='amp' convertTo='\\@AMP' />
<!-- <char num='34' name='quot' convertTo='\\@DOUBLEQUOT' /> XXX -->
<char num='34' name='quot' convertTo='\"' />
<char num='39' name='apos' convertTo=\"'\" />
<char num='169' name='copy' convertTo='\\copyright' />
<char num='60' name='lt' convertTo='$@LT$' />
<char num='62' name='gt' convertTo='$@GT$' />
<char num='338' name='OElig' convertTo='\\OE' />
<char num='339' name='oelig' convertTo='\\oe' />
<char num='352' name='Scaron' convertTo='\\v{S}' />
<char num='353' name='scaron' convertTo='\\v{s}' />
<char num='376' name='Yuml' convertTo='\\\"Y' />
<char num='710' name='circ' convertTo='\\textasciicircum' />
<char num='732' name='tilde' convertTo='\\textasciitilde' />
<char num='8211' name='ndash' convertTo='--' />
<char num='8212' name='mdash' convertTo='---' />
<char num='8216' name='lsquo' convertTo='`' />
<char num='8217' name='rsquo' convertTo=\"'\" /> <!-- XXXX -->
<char num='8220' name='ldquo' convertTo='``' />
<char num='8221' name='rdquo' convertTo=\"''\" /> <!-- XXXX -->
<char num='8224' name='dagger' convertTo='\\dag' />
<char num='8225' name='Dagger' convertTo='\\ddag' />
<char num='8240' name='permil' convertTo='\\permil' package='wasysym' /> <!-- wasysym package -->
<char num='8364' name='euro' convertTo='\\euro' package='eurosym' /> <!-- eurosym package -->
<char num='8249' name='lsaquo' convertTo='\\guilsinglleft' package='aeguill'/>
<char num='8250' name='rsaquo' convertTo='\\guilsinglright' package='aeguill' />
<!-- <char num='160' name='nbsp' convertTo='\\nolinebreak' />-->
<char num='160' name='nbsp' convertTo='~' />
<char num='161' name='iexcl' convertTo='\\textexclamdown' />
<char num='163' name='pound' convertTo='\\pounds' />
<char num='164' name='curren' convertTo='\\currency' package='wasysym' /> <!-- wasysym package -->
<char num='165' name='yen' convertTo='\\textyen' package='textcomp'/> <!-- textcomp -->
<char num='166' name='brvbar' convertTo='\\brokenvert' /> <!-- wasysym -->
<char num='167' name='sect' convertTo='\\S' />
<char num='171' name='laquo' convertTo='\\guillemotleft' package='aeguill'/>
<char num='187' name='raquo' convertTo='\\guillemotright' package='aeguill'/>
<char num='174' name='reg' convertTo='\\textregistered' />
<char num='170' name='ordf' convertTo='\\textordfeminine' />
<char num='172' name='not' convertTo='$\\neg$' />
<!-- <char num='176' name='deg' convertTo='$\\degree$' /> <!-- mathabx -->
<char num='176' name='deg' convertTo='\\textdegree' package='textcomp'/>
<char num='177' name='plusmn' convertTo='$\\pm$' />
<char num='180' name='acute' convertTo='@QUOT' />
<char num='181' name='micro' convertTo='$\\mu$' />
<char num='182' name='para' convertTo='\\P' />
<char num='183' name='middot' convertTo='$\\cdot$' />
<char num='186' name='ordm' convertTo='\\textordmasculine' />
<char num='162' name='cent' convertTo='\\cent' package='wasysym' />
<char num='185' name='sup1' convertTo='$^1$' />
<char num='178' name='sup2' convertTo='$^2$' />
<char num='179' name='sup3' convertTo='$^3$' />
<char num='189' name='frac12' convertTo='$\\frac{1}{2}$' />
<char num='188' name='frac14' convertTo='$\\frac{1}{4}$' />
<char num='190' name='frac34' convertTo='$\\frac{3}{4}$' />
<char num='192' name='Agrave' convertTo='\\`A' />
<char num='193' name='Aacute' convertTo='\\@QUOTA' />
<char num='194' name='Acirc' convertTo='\\^A' />
<char num='195' name='Atilde' convertTo='\\~A' />
<char num='196' name='Auml' convertTo='\\@DOUBLEQUOTA' />
<char num='197' name='Aring' convertTo='\\AA' />
<char num='198' name='AElig' convertTo='\\AE' />
<char num='199' name='Ccedil' convertTo='\\c{C}' />
<char num='200' name='Egrave' convertTo='\\`E' />
<char num='201' name='Eacute' convertTo='\\@QUOTE' />
<char num='202' name='Ecirc' convertTo='\\^E' />
<char num='203' name='Euml' convertTo='\\@DOUBLEQUOTE' />
<char num='204' name='Igrave' convertTo='\\`I' />
<char num='205' name='Iacute' convertTo='\\@QUOTI' />
<char num='206' name='Icirc' convertTo='\\^I' />
<char num='207' name='Iuml' convertTo='\\\"I' />
<char num='208' name='ETH' convertTo='$\\eth$' /> <!-- AMS -->
<char num='209' name='Ntilde' convertTo='\\~N' />
<char num='210' name='Ograve' convertTo='\\`O' />
<char num='211' name='Oacute' convertTo='\\@QUOT O' />
<char num='212' name='Ocirc' convertTo='\\^O' />
<char num='213' name='Otilde' convertTo='\\~O' />
<char num='214' name='Ouml' convertTo='\\@DOUBLEQUOTO' />
<char num='215' name='times' convertTo='$\\times$' />
<char num='216' name='Oslash' convertTo='\\O' />
<char num='217' name='Ugrave' convertTo='\\`U' />
<char num='218' name='Uacute' convertTo='\\@QUOTU' />
<char num='219' name='Ucirc' convertTo='\\^U' />
<char num='220' name='Uuml' convertTo='\\@DOUBLEQUOTU' />
<char num='221' name='Yacute' convertTo='\\@QUOTY' />
<char num='223' name='szlig' convertTo='\\ss' />
<char num='224' name='agrave' convertTo='\\`a' />
<char num='225' name='aacute' convertTo='\\@QUOTa' />
<char num='226' name='acirc' convertTo='\\^a' />
<char num='227' name='atilde' convertTo='\\~a' />
<char num='228' name='auml' convertTo='\\@DOUBLEQUOTa' />
<char num='229' name='aring' convertTo='\\aa' />
<char num='230' name='aelig' convertTo='\\ae' />
<char num='231' name='ccedil' convertTo='\\c{c}' />
<char num='232' name='egrave' convertTo='\\`e' />
<char num='233' name='eacute' convertTo='\\@QUOTe' />
<char num='234' name='ecirc' convertTo='\\^e' />
<char num='235' name='euml' convertTo='\\@DOUBLEQUOTe' />
<char num='236' name='igrave' convertTo='\\`i' />
<char num='237' name='iacute' convertTo='\\@QUOTi' />
<char num='238' name='icirc' convertTo='\\^i' />
<char num='239' name='iuml' convertTo='\\@DOUBLEQUOTi' />
<char num='240' name='eth' convertTo='$\\eth$' package='amssymb'/> <!-- -->
<char num='241' name='ntilde' convertTo='\\~n' />
<char num='242' name='ograve' convertTo='\\`o' />
<char num='243' name='oacute' convertTo='\\@QUOTo' />
<char num='244' name='ocirc' convertTo='\\^o' />
<char num='245' name='otilde' convertTo='\\~o' />
<char num='246' name='ouml' convertTo='\\@DOUBLEQUOTo' />
<!-- <char num='247' name='divide' convertTo='$\\divide$' /> -->
<char num='248' name='oslash' convertTo='\\o' />
<char num='249' name='ugrave' convertTo='\\`u' />
<char num='250' name='uacute' convertTo='\\@QUOTu' />
<char num='251' name='ucirc' convertTo='\\^u' />
<char num='252' name='uuml' convertTo='\\@DOUBLEQUOTu' />
<char num='253' name='yacute' convertTo='\\@QUOTy' />
<char num='255' name='yuml' convertTo='\\@DOUBLEQUOTy' />
<char num='222' name='THORN' convertTo='\\Thorn' package='wasysym' />
<char num='254' name='thorn' convertTo='\\thorn' package='wasysym' />
</chars>"
end end end

View File

@ -0,0 +1,64 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class String
# These are TeX's special characters
LATEX_ADD_SLASH = [ ?{, ?}, ?$, ?&, ?#, ?_, ?%]
# These, we transform to {\tt \char<ascii code>}
LATEX_TO_CHARCODE = [ ?^, ?~, ?>,?<]
def escape_to_latex(s)
s2 = ""
s.each_byte do |b|
if LATEX_TO_CHARCODE.include? b
s2 += "{\\tt \\char#{b}}"
elsif LATEX_ADD_SLASH.include? b
s2 << ?\\ << b
elsif b == ?\\
# there is no backslash in cmr10 fonts
s2 += "$\\backslash$"
else
s2 << b
end
end
s2
end
# escapes special characters
def to_latex
s = escape_to_latex(self)
OtherGoodies.each do |k, v|
s.gsub!(k, v)
end
s
end
# other things that are good on the eyes
OtherGoodies = {
/(\s)LaTeX/ => '\1\\LaTeX\\xspace ', # XXX not if already \LaTeX
# 'HTML' => '\\textsc{html}\\xspace ',
# 'PDF' => '\\textsc{pdf}\\xspace '
}
end

View File

@ -0,0 +1,164 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class String
# XXX: markdown escaping
def to_md(c=nil)
to_s
end
# " andrea censi " => [" andrea ", "censi "]
def mysplit
split.map{|x| x+" "}
end
end
module MaRuKu; module Out; module Markdown
DefaultLineLength = 40
def to_md(context={})
children_to_md(context)
end
def to_md_paragraph(context)
line_length = context[:line_length] || DefaultLineLength
wrap(@children, line_length, context)+"\n"
end
def to_md_li_span(context)
len = (context[:line_length] || DefaultLineLength) - 2
s = add_tabs(wrap(@children, len-2, context), 1, ' ')
s[0] = ?*
s + "\n"
end
def to_md_abbr_def(context)
"*[#{self.abbr}]: #{self.text}\n"
end
def to_md_ol(context)
len = (context[:line_length] || DefaultLineLength) - 2
md = ""
self.children.each_with_index do |li, i|
s = add_tabs(w=wrap(li.children, len-2, context), 1, ' ')+"\n"
s[0,4] = "#{i+1}. "[0,4]
# puts w.inspect
md += s
end
md + "\n"
end
def to_md_ul(context)
len = (context[:line_length] || DefaultLineLength) - 2
md = ""
self.children.each_with_index do |li, i|
w = wrap(li.children, len-2, context)
# puts "W: "+ w.inspect
s = add_indent(w)
# puts "S: " +s.inspect
s[0,1] = "-"
md += s
end
md + "\n"
end
def add_indent(s,char=" ")
t = s.split("\n").map{|x| char+x }.join("\n")
s << ?\n if t[-1] == ?\n
s
end
# Convert each child to html
def children_to_md(context)
array_to_md(@children, context)
end
def wrap(array, line_length, context)
out = ""
line = ""
array.each do |c|
if c.kind_of?(MDElement) && c.node_type == :linebreak
out << line.strip << " \n"; line="";
next
end
pieces =
if c.kind_of? String
c.to_md.mysplit
else
[c.to_md(context)].flatten
end
# puts "Pieces: #{pieces.inspect}"
pieces.each do |p|
if p.size + line.size > line_length
out << line.strip << "\n";
line = ""
end
line << p
end
end
out << line.strip << "\n" if line.size > 0
out << ?\n if not out[-1] == ?\n
out
end
def array_to_md(array, context, join_char='')
e = []
array.each do |c|
method = c.kind_of?(MDElement) ?
"to_md_#{c.node_type}" : "to_md"
if not c.respond_to?(method)
#raise "Object does not answer to #{method}: #{c.class} #{c.inspect[0,100]}"
# tell_user "Using default for #{c.node_type}"
method = 'to_md'
end
# puts "#{c.inspect} created with method #{method}"
h = c.send(method, context)
if h.nil?
raise "Nil md for #{c.inspect} created with method #{method}"
end
if h.kind_of?Array
e = e + h
else
e << h
end
end
e.join(join_char)
end
end end end
module MaRuKu; class MDDocument
alias old_md to_md
def to_md(context={})
s = old_md(context)
# puts s
s
end
end end

53
lib/maruku/output/to_s.rb Normal file
View File

@ -0,0 +1,53 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
class MDElement
# Strips all formatting from the string
def to_s
children_to_s
end
def children_to_s
@children.join
end
# Generate an id for headers. Assumes @children is set.
def generate_id
title = children_to_s
title.gsub!(/ /,'_')
title.downcase!
title.gsub!(/[^\w_]/,'')
title.strip!
if title.size == 0
$uid ||= 0
$uid += 1
title = "id#{$uid}"
end
title
end
end
end

184
lib/maruku/string_utils.rb Normal file
View File

@ -0,0 +1,184 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
# Boring stuff with strings.
module MaRuKu; module Strings
def add_tabs(s,n=1,char="\t")
s.split("\n").map{|x| char*n+x }.join("\n")
end
TabSize = 4;
def split_lines(s)
s.split("\n")
end
# This parses email headers. Returns an hash.
#
# +hash['data']+ is the message.
#
# Keys are downcased, space becomes underscore, converted to symbols.
#
# My key: true
#
# becomes:
#
# {:my_key => true}
#
def parse_email_headers(s)
keys={}
match = (s =~ /((\w[\w\s]+: .*\n)+)\n/)
if match != 0
keys[:data] = s
else
keys[:data] = $'
headers = $1
headers.split("\n").each do |l|
k, v = l.split(':')
k, v = normalize_key_and_value(k, v)
k = k.to_sym
# puts "K = #{k}, V=#{v}"
keys[k] = v
end
end
keys
end
# Keys are downcased, space becomes underscore, converted to symbols.
def normalize_key_and_value(k,v)
v = v ? v.strip : true # no value defaults to true
k = k.strip
# check synonyms
v = true if ['yes','true'].include?(v.to_s.downcase)
v = false if ['no','false'].include?(v.to_s.downcase)
k = k.downcase.gsub(' ','_')
return k, v
end
# Returns the number of leading spaces, considering that
# a tab counts as `TabSize` spaces.
def number_of_leading_spaces(s)
n=0; i=0;
while i < s.size
c = s[i,1]
if c == ' '
i+=1; n+=1;
elsif c == "\t"
i+=1; n+=TabSize;
else
break
end
end
n
end
# This returns the position of the first real char in a list item
#
# For example:
# '*Hello' # => 1
# '* Hello' # => 2
# ' * Hello' # => 3
# ' * Hello' # => 5
# '1.Hello' # => 2
# ' 1. Hello' # => 5
def spaces_before_first_char(s)
case s.md_type
when :ulist
i=0;
# skip whitespace if present
while s[i,1] =~ /\s/; i+=1 end
# skip indicator (+, -, *)
i+=1
# skip optional whitespace
while s[i,1] =~ /\s/; i+=1 end
return i
when :olist
i=0;
# skip whitespace
while s[i,1] =~ /\s/; i+=1 end
# skip digits
while s[i,1] =~ /\d/; i+=1 end
# skip dot
i+=1
# skip whitespace
while s[i,1] =~ /\s/; i+=1 end
return i
else
tell_user "BUG (my bad): '#{s}' is not a list"
0
end
end
# Counts the number of leading '#' in the string
def num_leading_hashes(s)
i=0;
while i<(s.size-1) && (s[i,1]=='#'); i+=1 end
i
end
# Strips initial and final hashes
def strip_hashes(s)
s = s[num_leading_hashes(s), s.size]
i = s.size-1
while i > 0 && (s[i,1] =~ /(#|\s)/); i-=1; end
s[0, i+1].strip
end
# removes initial quote
def unquote(s)
s.gsub(/^>\s?/,'')
end
# toglie al massimo n caratteri
def strip_indent(s, n)
i = 0
while i < s.size && n>0
c = s[i,1]
if c == ' '
n-=1;
elsif c == "\t"
n-=TabSize;
else
break
end
i+=1
end
s[i, s.size-1]
end
def dbg_describe_ary(a, prefix='')
i = 0
a.each do |l|
puts "#{prefix} (#{i+=1})# #{l.inspect}"
end
end
def force_linebreak?(l)
l =~ / $/
end
end
end

165
lib/maruku/structures.rb Normal file
View File

@ -0,0 +1,165 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class Module
def safe_attr_accessor1(symbol, klass)
attr_reader symbol
code = <<-EOF
def #{symbol}=(val)
if not val.kind_of? #{klass}
s = "\nCould not assign an object of type \#{val.class} to #{symbol}.\n\n"
s += "Tried to assign object of class \#{val.class}:\n"+
"\#{val.inspect}\n"+
"to \#{self.class}::#{symbol} constrained to be of class #{klass}.\n"
raise s
end
@#{symbol} = val
end
EOF
module_eval code
end
def safe_attr_accessor2(symbol, klass)
attr_accessor symbol
end
alias safe_attr_accessor safe_attr_accessor2
end
module MaRuKu
# I did not want to have a class for each possible element.
# Instead I opted to have only the class "MDElement"
# that represents eveything in the document (paragraphs, headers, etc).
#
# You can tell what it is by the variable `node_type`.
#
# In the instance-variable `children` there are the children. These
# can be of class 1) String or 2) MDElement.
#
# The @doc variable points to the document to which the MDElement
# belongs (which is an instance of Maruku, subclass of MDElement).
#
# Attributes are contained in the hash `attributes`.
# Keys are symbols (downcased, with spaces substituted by underscores)
#
# For example, if you write in the source document.
#
# Title: test document
# My property: value
#
# content content
#
# You can access `value` by writing:
#
# @doc.attributes[:my_property] # => 'value'
#
# from whichever MDElement in the hierarchy.
#
class MDElement
# See helpers.rb for the list of allowed #node_type values
safe_attr_accessor :node_type, Symbol
# Children are either Strings or MDElement
safe_attr_accessor :children, Array
# An attribute list, may not be nil
safe_attr_accessor :al, Array #Maruku::AttributeList
# These are the processed attributes
safe_attr_accessor :attributes, Hash
# Reference of the document (which is of class Maruku)
attr_accessor :doc
def initialize(node_type=:unset, children=[], meta={},
al=MaRuKu::AttributeList.new )
super();
self.children = children
self.node_type = node_type
@attributes = {}
meta.each do |symbol, value|
self.instance_eval "
def #{symbol}; @#{symbol}; end
def #{symbol}=(val); @#{symbol}=val; end"
self.send "#{symbol}=", value
end
self.al = al || AttributeList.new
self.meta_priv = meta
end
attr_accessor :meta_priv
def ==(o)
ok = o.kind_of?(MDElement) &&
(self.node_type == o.node_type) &&
(self.meta_priv == o.meta_priv) &&
(self.children == o.children)
if not ok
# puts "This:\n"+self.inspect+"\nis different from\n"+o.inspect+"\n\n"
end
ok
end
end
# This represents the whole document and holds global data.
class MDDocument
safe_attr_accessor :refs, Hash
safe_attr_accessor :footnotes, Hash
# This is an hash. The key might be nil.
safe_attr_accessor :abbreviations, Hash
# Attribute lists definition
safe_attr_accessor :ald, Hash
# The order in which footnotes are used. Contains the id.
safe_attr_accessor :footnotes_order, Array
safe_attr_accessor :latex_required_packages, Array
def initialize(s=nil)
super(:document)
@doc = self
self.refs = {}
self.footnotes = {}
self.footnotes_order = []
self.abbreviations = {}
self.ald = {}
self.latex_required_packages = []
parse_doc(s) if s
end
end
end # MaRuKu

View File

@ -0,0 +1,87 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class String
def inspect_more(a=nil,b=nil)
inspect
end
end
class Object
def inspect_more(a=nil,b=nil)
inspect
end
end
class Array
def inspect_more(compact, join_string, add_brackets=true)
s = map {|x|
x.kind_of?(String) ? x.inspect :
x.kind_of?(MaRuKu::MDElement) ? x.inspect(compact) :
(raise "WTF #{x.class} #{x.inspect}")
}.join(join_string)
add_brackets ? "[#{s}]" : s
end
end
class Hash
def inspect_ordered(a=nil,b=nil)
"{"+keys.map{|x|x.to_s}.sort.map{|x|x.to_sym}.
map{|k| k.inspect + "=>"+self[k].inspect}.join(',')+"}"
end
end
module MaRuKu
class MDElement
def inspect(compact=true)
if compact
i2 = inspect2
return i2 if i2
end
"md_el(:%s,%s,%s,%s)" %
[
self.node_type,
children_inspect(compact),
@meta_priv.inspect_ordered,
self.al.inspect
]
end
def children_inspect(compact=true)
s = @children.inspect_more(compact,', ')
if @children.empty?
"[]"
elsif s.size < 70
s
else
"[\n"+
add_tabs(@children.inspect_more(compact,",\n",false))+
"\n]"
end
end
end
end

View File

@ -0,0 +1,61 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
class MDElement
# Yields to each element of specified node_type
# All elements if e_node_type is nil.
def each_element(e_node_type=nil, &block)
@children.each do |c|
if c.kind_of? MDElement
if (not e_node_type) || (e_node_type == c.node_type)
block.call c
end
c.each_element(e_node_type, &block)
end
end
end
# Apply passed block to each String in the hierarchy.
def replace_each_string(&block)
for c in @children
if c.kind_of? MDElement
c.replace_each_string(&block)
end
end
processed = []
until @children.empty?
c = @children.shift
if c.kind_of? String
result = block.call(c)
[*result].each do |e| processed << e end
else
processed << c
end
end
@children = processed
end
end
end

View File

@ -0,0 +1,82 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'maruku'
require 'bluecloth'
data = $stdin.read
num = 10
if ARGV.size > 0 && ((n=ARGV[0].to_i) != 0)
num = n
end
methods =
[
[Maruku, :to_html],
[BlueCloth, :to_html],
[Maruku, :to_latex]
]
#methods = [[Maruku, :class]]
#num = 10
stats =
methods .map do |c, method|
puts "Computing for #{c}"
start = Time.now
doc = nil
for i in 1..num
$stdout.write "#{i} "; $stdout.flush
doc = c.new(data)
end
stop = Time.now
parsing = (stop-start)/num
start = Time.now
for i in 1..num
$stdout.write "#{i} "; $stdout.flush
s = doc.send method
end
stop = Time.now
rendering = (stop-start)/num
puts ("%s (%s): parsing %0.2f sec + rendering %0.2f sec "+
"= %0.2f sec ") % [c, method, parsing,rendering,parsing+rendering]
[c, method, parsing, rendering]
end
puts "\n\n\n"
stats.each do |x| x.push(x[2]+x[3]) end
max = stats.map{|x|x[4]}.max
stats.sort! { |x,y| x[4] <=> y[4] } . reverse!
for c, method, parsing, rendering, tot in stats
puts ("%20s: parsing %0.2f sec + rendering %0.2f sec "+
"= %0.2f sec (%0.2fx)") %
["#{c} (#{method})", parsing,rendering,tot,max/tot]
end

View File

@ -0,0 +1,359 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'maruku'
module MaRuKu; module Tests
# 5 accented letters in italian, encoded as UTF-8
AccIta8 = "\303\240\303\250\303\254\303\262\303\271"
# Same letters, written in ISO-8859-1 (one byte per letter)
AccIta1 = "\340\350\354\362\371"
# The word MA-RU-KU, written in katakana using UTF-8
Maruku8 = "\343\203\236\343\203\253\343\202\257"
def test_span_parser(verbose, break_on_first_error, quiet)
good_cases = [
["", [], 'Empty string gives empty list'],
["a", ["a"], 'Easy char'],
[" a", ["a"], 'First space in the paragraph is ignored'],
["a\n \n", ["a"], 'Last spaces in the paragraphs are ignored'],
[' ', [], 'One char => nothing'],
[' ', [], 'Two chars => nothing'],
['a b', ['a b'], 'Spaces are compressed'],
['a b', ['a b'], 'Newlines are spaces'],
["a\nb", ['a b'], 'Newlines are spaces'],
["a\n b", ['a b'], 'Compress newlines 1'],
["a \nb", ['a b'], 'Compress newlines 2'],
[" \nb", ['b'], 'Compress newlines 3'],
["\nb", ['b'], 'Compress newlines 4'],
["b\n", ['b'], 'Compress newlines 5'],
["\n", [], 'Compress newlines 6'],
["\n\n\n", [], 'Compress newlines 7'],
[nil, :throw, "Should throw on nil input"],
# Code blocks
["`" , :throw, 'Unclosed single ticks'],
["``" , :throw, 'Unclosed double ticks'],
["`a`" , [md_code('a')], 'Simple inline code'],
["`` ` ``" , [md_code('`')], ],
["`` \\` ``" , [md_code('\\`')], ],
["``a``" , [md_code('a')], ],
["`` a ``" , [md_code('a')], ],
# Newlines
["a \n", ['a',md_el(:linebreak)], 'Two spaces give br.'],
["a \n", ['a'], 'Newlines 2'],
[" \n", [md_el(:linebreak)], 'Newlines 3'],
[" \n \n", [md_el(:linebreak),md_el(:linebreak)],'Newlines 3'],
[" \na \n", [md_el(:linebreak),'a',md_el(:linebreak)],'Newlines 3'],
# Inline HTML
["a < b", ['a < b'], '< can be on itself'],
["<hr>", [md_html('<hr />')], 'HR will be sanitized'],
["<hr/>", [md_html('<hr />')], 'Closed tag is ok'],
["<hr />", [md_html('<hr />')], 'Closed tag is ok 2'],
["<hr/>a", [md_html('<hr />'),'a'], 'Closed tag is ok 2'],
["<em></em>a", [md_html('<em></em>'),'a'], 'Inline HTML 1'],
["<em>e</em>a", [md_html('<em>e</em>'),'a'], 'Inline HTML 2'],
["a<em>e</em>b", ['a',md_html('<em>e</em>'),'b'], 'Inline HTML 3'],
["<em>e</em>a<em>f</em>",
[md_html('<em>e</em>'),'a',md_html('<em>f</em>')],
'Inline HTML 4'],
["<em>e</em><em>f</em>a",
[md_html('<em>e</em>'),md_html('<em>f</em>'),'a'],
'Inline HTML 5'],
["<img src='a' />", [md_html("<img src='a' />")], 'Attributes'],
["<img src='a'/>"],
# emphasis
["**", :throw, 'Unclosed double **'],
["\\*", ['*'], 'Escaping of *'],
["a *b* ", ['a ', md_em('b')], 'Emphasis 1'],
["a *b*", ['a ', md_em('b')], 'Emphasis 2'],
["a * b", ['a * b'], 'Emphasis 3'],
["a * b*", :throw, 'Unclosed emphasis'],
# same with underscore
["__", :throw, 'Unclosed double __'],
["\\_", ['_'], 'Escaping of _'],
["a _b_ ", ['a ', md_em('b')], 'Emphasis 4'],
["a _b_", ['a ', md_em('b')], 'Emphasis 5'],
["a _ b", ['a _ b'], 'Emphasis 6'],
["a _ b_", :throw, 'Unclosed emphasis'],
["_b_", [md_em('b')], 'Emphasis 7'],
["_b_ _c_", [md_em('b'),' ',md_em('c')], 'Emphasis 8'],
["_b__c_", [md_em('b'),md_em('c')], 'Emphasis 9'],
# strong
["**a*", :throw, 'Unclosed double ** 2'],
["\\**a*", ['*', md_em('a')], 'Escaping of *'],
["a **b** ", ['a ', md_strong('b')], 'Emphasis 1'],
["a **b**", ['a ', md_strong('b')], 'Emphasis 2'],
["a ** b", ['a ** b'], 'Emphasis 3'],
["a ** b**", :throw, 'Unclosed emphasis'],
["**b****c**", [md_strong('b'),md_strong('c')], 'Emphasis 9'],
# strong (with underscore)
["__a_", :throw, 'Unclosed double __ 2'],
["\\__a_", ['_', md_em('a')], 'Escaping of _'],
["a __b__ ", ['a ', md_strong('b')], 'Emphasis 1'],
["a __b__", ['a ', md_strong('b')], 'Emphasis 2'],
["a __ b", ['a __ b'], 'Emphasis 3'],
["a __ b__", :throw, 'Unclosed emphasis'],
["__b____c__", [md_strong('b'),md_strong('c')], 'Emphasis 9'],
# extra strong
["***a**", :throw, 'Unclosed triple *** '],
["\\***a**", ['*', md_strong('a')], 'Escaping of *'],
["a ***b*** ", ['a ', md_emstrong('b')], 'Strong elements'],
["a ***b***", ['a ', md_emstrong('b')]],
["a *** b", ['a *** b']],
["a ** * b", ['a ** * b']],
["***b******c***", [md_emstrong('b'),md_emstrong('c')]],
["a *** b***", :throw, 'Unclosed emphasis'],
# same with underscores
["___a__", :throw, 'Unclosed triple *** '],
["\\___a__", ['_', md_strong('a')], 'Escaping of *'],
["a ___b___ ", ['a ', md_emstrong('b')], 'Strong elements'],
["a ___b___", ['a ', md_emstrong('b')]],
["a ___ b", ['a ___ b']],
["a __ _ b", ['a __ _ b']],
["___b______c___", [md_emstrong('b'),md_emstrong('c')]],
["a ___ b___", :throw, 'Unclosed emphasis'],
# mixing is bad
["*a_", :throw, 'Mixing is bad'],
["_a*", :throw],
["**a__", :throw],
["__a**", :throw],
["___a***", :throw],
["***a___", :throw],
# links of the form [text][ref]
["\\[a]", ["[a]"], 'Escaping 1'],
["\\[a\\]", ["[a]"], 'Escaping 2'],
["[a]", ["a"], 'Not a link'],
["[a][]", [ md_link(["a"],'')], 'Empty link'],
["[a][]b", [ md_link(["a"],''),'b'], 'Empty link'],
["[a\\]][]", [ md_link(["a]"],'')], 'Escape inside link'],
["[a", :throw, 'Link not closed'],
["[a][", :throw, 'Ref not closed'],
# links of the form [text](url)
["\\[a](b)", ["[a](b)"], 'Links'],
["[a](url)c", [md_im_link(['a'],'url'),'c'], 'url'],
["[a]( url )c" ],
["[a] ( url )c" ],
["[a] ( url)c" ],
["[a](ur:/l/ 'Title')", [md_im_link(['a'],'ur:/l/','Title')],
'url and title'],
["[a] ( ur:/l/ \"Title\")" ],
["[a] ( ur:/l/ \"Title\")" ],
["[a]( ur:/l/ Title)", :throw, "Must quote title" ],
["[a](url 'Tit\\\"l\\\\e')", [md_im_link(['a'],'url','Tit"l\\e')],
'url and title escaped'],
["[a] ( url \"Tit\\\"l\\\\e\")" ],
["[a] ( url \"Tit\\\"l\\\\e\" )" ],
['[a] ( url "Tit\\"l\\\\e" )' ],
["[a]()", [md_im_link(['a'],'')], 'No URL is OK'],
["[a](\"Title\")", :throw, "No url specified" ],
["[a](url \"Title)", :throw, "Unclosed quotes" ],
["[a](url \"Title\\\")", :throw],
["[a](url \"Title\" ", :throw],
["[a](url \'Title\")", :throw, "Mixing is bad" ],
["[a](url \"Title\')"],
["[a](/url)", [md_im_link(['a'],'/url')], 'Funny chars in url'],
["[a](#url)", [md_im_link(['a'],'#url')]],
["[a](</script?foo=1&bar=2>)", [md_im_link(['a'],'/script?foo=1&bar=2')]],
# Images
["\\![a](url)", ['!', md_im_link(['a'],'url') ], 'Escaping images'],
["![a](url)", [md_im_image(['a'],'url')], 'Image no title'],
["![a]( url )" ],
["![a] ( url )" ],
["![a] ( url)" ],
["![a](url 'ti\"tle')", [md_im_image(['a'],'url','ti"tle')], 'Image with title'],
['![a]( url "ti\\"tle")' ],
["![a](url", :throw, 'Invalid images'],
["![a( url )" ],
["![a] ('url )" ],
["![a][imref]", [md_image(['a'],'imref')], 'Image with ref'],
["![a][ imref]"],
["![a][ imref ]"],
["![a][\timref\t]"],
['<http://example.com/?foo=1&bar=2>',
[md_url('http://example.com/?foo=1&bar=2')], 'Immediate link'],
['a<http://example.com/?foo=1&bar=2>b',
['a',md_url('http://example.com/?foo=1&bar=2'),'b'] ],
['<andrea@censi.org>',
[md_email('andrea@censi.org')], 'Email address'],
['<mailto:andrea@censi.org>'],
["Developmen <http://rubyforge.org/projects/maruku/>",
["Developmen ", md_url("http://rubyforge.org/projects/maruku/")]],
["a<!-- -->b", ['a',md_html('<!-- -->'),'b'],
'HTML Comment'],
["a<!--", :throw, 'Bad HTML Comment'],
["a<!-- ", :throw, 'Bad HTML Comment'],
["<?xml <?!--!`3 ?>", [md_xml_instr('xml','<?!--!`3')], 'XML processing instruction'],
["<? <?!--!`3 ?>", [md_xml_instr('','<?!--!`3')] ],
["<? ", :throw, 'Bad Server directive'],
["a <b", :throw, 'Bad HTML 1'],
["<b", :throw, 'Bad HTML 2'],
["<b!", :throw, 'Bad HTML 3'],
['`<div>`, `<table>`, `<pre>`, `<p>`',
[md_code('<div>'),', ',md_code('<table>'),', ',
md_code('<pre>'),', ',md_code('<p>')],
'Multiple HTLM tags'],
["&andrea", ["&andrea"], 'Parsing of entities'],
# no escaping is allowed
# ["\\&andrea;", ["&andrea;"]],
["l&andrea;", ["l", md_entity('andrea')] ],
["&&andrea;", ["&", md_entity('andrea')] ],
["&123;;&amp;",[md_entity('123'),';',md_entity('amp')]],
["a\nThe [syntax page] [s] provides",
['a The ', md_link(['syntax page'],'s'), ' provides'], 'Regression'],
['![a](url "ti"tle")', [md_im_image(['a'],'url','ti"tle')],
"Image with quotes"],
['![a](url \'ti"tle\')' ],
['[bar](/url/ "Title with "quotes" inside")',
[md_im_link(["bar"],'/url/', 'Title with "quotes" inside')],
"Link with quotes"],
['$20,000 and $30,000', ['$20,000 and $30,000'], 'Math: spaces'],
['$20,000$', [md_inline_math('20,000')]],
['$ 20,000$', ['$ 20,000$']],
['$20,000 $ $20,000$', ['$20,000 $ ', md_inline_math('20,000')]],
["#{Maruku8}", [Maruku8], "Reading UTF-8"],
["#{AccIta1}", [AccIta8], "Converting ISO-8859-1 to UTF-8",
{:encoding => 'iso-8859-1'}],
]
good_cases = unit_tests_for_attribute_lists + good_cases
count = 1; last_comment=""; last_expected=:throw
good_cases.each do |t|
if not t[1]
t[1] = last_expected
else
last_expected = t[1]
end
if not t[2]
t[2] = last_comment + " #{count+=1}"
else
last_comment = t[2]; count=1
end
end
@verbose = verbose
m = Maruku.new
m.attributes[:on_error] = :raise
Globals[:debug_keep_ials] = true
good_cases.each do |input, expected, comment|
output = nil
begin
output = m.parse_span_better(input)
#lines = Maruku.split_lines input
#output = m.parse_lines_as_span(lines)
rescue Exception => e
if not expected == :throw
ex = e.inspect+ "\n"+ e.backtrace.join("\n")
s = comment+describe_difference(input, expected, output)
print_status(comment,'CRASHED :-(', ex+s)
raise e if @break_on_first_error
else
quiet || print_status(comment,'OK')
end
end
if not expected == :throw
if not (expected == output)
s = comment+describe_difference(input, expected, output)
print_status(comment, 'FAILED', s)
break if break_on_first_error
else
quiet || print_status(comment, 'OK')
end
else # I expected a raise
if output
s = comment+describe_difference(input, expected, output)
print_status(comment, 'FAILED (no throw)', s)
break if break_on_first_error
end
end
end
end
PAD=40
def print_status(comment, status, verbose_text=nil)
if comment.size < PAD
comment = comment + (" "*(PAD-comment.size))
end
puts "- #{comment} #{status}"
if @verbose and verbose_text
puts verbose_text
end
end
def describe_difference(input, expected, output)
"\nInput:\n #{input.inspect}" +
"\nExpected:\n #{expected.inspect}" +
"\nOutput:\n #{output.inspect}\n"
end
end end
class Maruku
include MaRuKu::Tests
end
verbose = ARGV.include? 'v'
break_on_first = ARGV.include? 'b'
quiet = ARGV.include? 'q'
Maruku.new.test_span_parser(verbose, break_on_first, quiet)

136
lib/maruku/tests/tests.rb Normal file
View File

@ -0,0 +1,136 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'maruku'
class Maruku
def Maruku.failed(test, doc, s)
raise "Test failed: #{s}\n*****\n#{test}\n*****\n"+
"#{doc.inspect}\n*****\n{doc.to_html}"
end
def Maruku.metaTests
ref = {:id => 'id1', :class => ['class1','class2'],
:style=> 'Style is : important = for all } things'}
tests = MetaTests.split('***')
for test in tests
#puts "Test: #{test.inspect}"
doc = Maruku.new(test)
doc.children.size == 1 ||
failed(test, doc, "children != 1")
h = doc.children[0]
h.node_type==:header ||
failed(test, doc, "child not header")
# puts doc.inspect
# puts doc.to_html
end
end
MetaTests = <<EOF
# Head # {ref1 ref2 ref3}
{ref1}: id: id1; class: class1
{ref2}: class: class2
{ref3}: style: "Style is : important = for all } things"
***
# Head # {ref1 ref3 ref2}
{ref1}: id: id1; class: class1
{ref2}: class: class2
{ref3}: style: "Style is : important = for all } things"
***
# Head # {ref1 ref2 ref3}
{ref1}: id= id1; class=class1
{ref2}: class=class2
{ref3}: style="Style is : important = for all } things"
***
# Head # {ref1 ref2 ref3}
{ref1}: id=id1 class=class1
{ref2}: class=class2
{ref3}: style="Style is : important = for all } things"
***
# Head # {ref1 ref2 ref3}
{ref1}: id:id1 class:class1
{ref2}: class : class2
{ref3}: style = "Style is : important = for all } things"
***
# Head # {ref1 ref2 ref3}
{ref1}: id:id1 class:class1
{ref2}: class : class2
{ref3}: style = "Style is : important = for all } things"
***
# Head # {#id1 .class1 ref2 ref3}
{ref2}: class : class2
{ref3}: style = "Style is : important = for all } things"
***
# Head # { #id1 .class1 ref2 ref3 }
{ref2}: class : class2
{ref3}: style = "Style is : important = for all } things"
***
# Head # { id=id1 class=class1 ref2 ref3 }
{ref2}: class : class2
{ref3}: style = "Style is : important = for all } things"
***
# Head # { id:id1 class="class1" class:"class2" style="Style is : important = for all } things"}
EOF
end
if File.basename($0) == 'tests.rb'
Maruku.metaTests
end

199
lib/maruku/toc.rb Normal file
View File

@ -0,0 +1,199 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
class MDDocument
# an instance of Section (see below)
attr_accessor :toc
end
# This represents a section in the TOC.
class Section
# a Fixnum, is == header_element.level
attr_accessor :section_level
# An array of fixnum, like [1,2,5] for Section 1.2.5
attr_accessor :section_number
# reference to header (header has h.meta[:section] to self)
attr_accessor :header_element
# Array of immediate children of this element
attr_accessor :immediate_children
# Array of Section inside this section
attr_accessor :section_children
def initialize
@immediate_children = []
@section_children = []
end
end
class Section
def inspect(indent=1)
s = ""
if @header_element
s += "\_"*indent + "(#{@section_level})>\t #{@section_number.join('.')} : "
s += @header_element.children_to_s +
" (id: '#{@header_element.attributes[:id]}')\n"
else
s += "Master\n"
end
@section_children.each do |c|
s+=c.inspect(indent+1)
end
s
end
# Numerate this section and its children
def numerate(a=[])
self.section_number = a
section_children.each_with_index do |c,i|
c.numerate(a.clone.push(i+1))
end
if h = self.header_element
h.attributes[:section_number] = self.section_number
end
end
include REXML
# Creates an HTML toc.
# Call this on the root
def to_html
div = Element.new 'div'
div.attributes['class'] = 'maruku_toc'
div << create_toc
div
end
def create_toc
ul = Element.new 'ul'
# let's remove the bullets
ul.attributes['style'] = 'list-style: none;'
@section_children.each do |c|
li = Element.new 'li'
if span = c.header_element.render_section_number
li << span
end
a = c.header_element.wrap_as_element('a')
a.delete_attribute 'id'
a.attributes['href'] = "##{c.header_element.attributes[:id]}"
li << a
li << c.create_toc if c.section_children.size>0
ul << li
end
ul
end
# Creates a latex toc.
# Call this on the root
def to_latex
to_latex_rec + "\n\n"
end
def to_latex_rec
s = ""
@section_children.each do |c|
s += "\\noindent"
number = c.header_element.section_number
s += number if number
text = c.header_element.children_to_latex
id = c.header_element.attributes[:id]
s += "\\hyperlink{#{id}}{#{text}}"
s += "\\dotfill \\pageref*{#{id}} \\linebreak\n"
s += c.to_latex_rec if c.section_children.size>0
end
s
end
end
class MDDocument
def create_toc
each_element(:header) do |h|
h.attributes[:id] ||= h.generate_id
end
stack = []
# the ancestor section
s = Section.new
s.section_level = 0
stack.push s
i = 0;
while i < @children.size
while i < @children.size
if @children[i].node_type == :header
level = @children[i].level
break if level <= stack.last.section_level+1
end
stack.last.immediate_children.push @children[i]
i += 1
end
break if i>=@children.size
header = @children[i]
level = header.level
if level > stack.last.section_level
# this level is inside
s2 = Section.new
s2.section_level = level
s2.header_element = header
header.instance_variable_set :@section, s2
stack.last.section_children.push s2
stack.push s2
i+=1
elsif level == stack.last.section_level
# this level is a sibling
stack.pop
else
# this level is a parent
stack.pop
end
end
# If there is only one big header, then assume
# it is the master
if s.section_children.size == 1
s = s.section_children.first
end
# Assign section numbers
s.numerate
s
end
end
end

View File

@ -0,0 +1,33 @@
require 'maruku'
text = <<EOF
Chapter 1
=========
It was a stormy and rainy night.
EOF
invalid = <<EOF
This is a [bad link.
EOF
Maruku.new(text).to_html
s = ""
begin
Maruku.new(invalid, {:on_error => :raise, :error_stream => s})
puts "Error! It should have thrown an exception."
rescue
# puts "ok, got error"
end
begin
Maruku.new(invalid, {:on_error => :warning, :error_stream => s})
rescue
puts "Error! It should not have thrown an exception."
end

38
lib/maruku/version.rb Normal file
View File

@ -0,0 +1,38 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
Version = '0.4.2.1'
MarukuURL = 'http://maruku.rubyforge.org/'
# If true, use also PHP Markdown extra syntax
#
# Note: it is not guaranteed that if it's false
# then no special features will be used.
def markdown_extra?
true
end
def new_meta_data?
true
end
end

38
lib/syntax.rb Normal file
View File

@ -0,0 +1,38 @@
require 'syntax/common'
module Syntax
# A default tokenizer for handling syntaxes that are not explicitly handled
# elsewhere. It simply yields the given text as a single token.
class Default
# Yield the given text as a single token.
def tokenize( text )
yield Token.new( text, :normal )
end
end
# A hash for registering syntax implementations.
SYNTAX = Hash.new( Default )
# Load the implementation of the requested syntax. If the syntax cannot be
# found, or if it cannot be loaded for whatever reason, the Default syntax
# handler will be returned.
def load( syntax )
begin
require "syntax/lang/#{syntax}"
rescue LoadError
end
SYNTAX[ syntax ].new
end
module_function :load
# Return an array of the names of supported syntaxes.
def all
lang_dir = File.join(File.dirname(__FILE__), "syntax", "lang")
Dir["#{lang_dir}/*.rb"].map { |path| File.basename(path, ".rb") }
end
module_function :all
end

163
lib/syntax/common.rb Normal file
View File

@ -0,0 +1,163 @@
require 'strscan'
module Syntax
# A single token extracted by a tokenizer. It is simply the lexeme
# itself, decorated with a 'group' attribute to identify the type of the
# lexeme.
class Token < String
# the type of the lexeme that was extracted.
attr_reader :group
# the instruction associated with this token (:none, :region_open, or
# :region_close)
attr_reader :instruction
# Create a new Token representing the given text, and belonging to the
# given group.
def initialize( text, group, instruction = :none )
super text
@group = group
@instruction = instruction
end
end
# The base class of all tokenizers. It sets up the scanner and manages the
# looping until all tokens have been extracted. It also provides convenience
# methods to make sure adjacent tokens of identical groups are returned as
# a single token.
class Tokenizer
# The current group being processed by the tokenizer
attr_reader :group
# The current chunk of text being accumulated
attr_reader :chunk
# Start tokenizing. This sets up the state in preparation for tokenization,
# such as creating a new scanner for the text and saving the callback block.
# The block will be invoked for each token extracted.
def start( text, &block )
@chunk = ""
@group = :normal
@callback = block
@text = StringScanner.new( text )
setup
end
# Subclasses may override this method to provide implementation-specific
# setup logic.
def setup
end
# Finish tokenizing. This flushes the buffer, yielding any remaining text
# to the client.
def finish
start_group nil
teardown
end
# Subclasses may override this method to provide implementation-specific
# teardown logic.
def teardown
end
# Subclasses must implement this method, which is called for each iteration
# of the tokenization process. This method may extract multiple tokens.
def step
raise NotImplementedError, "subclasses must implement #step"
end
# Begins tokenizing the given text, calling #step until the text has been
# exhausted.
def tokenize( text, &block )
start text, &block
step until @text.eos?
finish
end
# Specify a set of tokenizer-specific options. Each tokenizer may (or may
# not) publish any options, but if a tokenizer does those options may be
# used to specify optional behavior.
def set( opts={} )
( @options ||= Hash.new ).update opts
end
# Get the value of the specified option.
def option(opt)
@options ? @options[opt] : nil
end
private
EOL = /(?=\r\n?|\n|$)/
# A convenience for delegating method calls to the scanner.
def self.delegate( sym )
define_method( sym ) { |*a| @text.__send__( sym, *a ) }
end
delegate :bol?
delegate :eos?
delegate :scan
delegate :scan_until
delegate :check
delegate :check_until
delegate :getch
delegate :matched
delegate :pre_match
delegate :peek
delegate :pos
# Access the n-th subgroup from the most recent match.
def subgroup(n)
@text[n]
end
# Append the given data to the currently active chunk.
def append( data )
@chunk << data
end
# Request that a new group be started. If the current group is the same
# as the group being requested, a new group will not be created. If a new
# group is created and the current chunk is not empty, the chunk's
# contents will be yielded to the client as a token, and then cleared.
#
# After the new group is started, if +data+ is non-nil it will be appended
# to the chunk.
def start_group( gr, data=nil )
flush_chunk if gr != @group
@group = gr
@chunk << data if data
end
def start_region( gr, data=nil )
flush_chunk
@group = gr
@callback.call( Token.new( data||"", @group, :region_open ) )
end
def end_region( gr, data=nil )
flush_chunk
@group = gr
@callback.call( Token.new( data||"", @group, :region_close ) )
end
def flush_chunk
@callback.call( Token.new( @chunk, @group ) ) unless @chunk.empty?
@chunk = ""
end
def subtokenize( syntax, text )
tokenizer = Syntax.load( syntax )
tokenizer.set @options if @options
flush_chunk
tokenizer.tokenize( text, &@callback )
end
end
end

View File

@ -0,0 +1,27 @@
require 'syntax'
module Syntax
module Convertors
# The abstract ancestor class for all convertors. It implements a few
# convenience methods to provide a common interface for all convertors.
class Abstract
# A reference to the tokenizer used by this convertor.
attr_reader :tokenizer
# A convenience method for instantiating a new convertor for a
# specific syntax.
def self.for_syntax( syntax )
new( Syntax.load( syntax ) )
end
# Creates a new convertor that uses the given tokenizer.
def initialize( tokenizer )
@tokenizer = tokenizer
end
end
end
end

View File

@ -0,0 +1,51 @@
require 'syntax/convertors/abstract'
module Syntax
module Convertors
# A simple class for converting a text into HTML.
class HTML < Abstract
# Converts the given text to HTML, using spans to represent token groups
# of any type but <tt>:normal</tt> (which is always unhighlighted). If
# +pre+ is +true+, the html is automatically wrapped in pre tags.
def convert( text, pre=true )
html = ""
html << "<pre>" if pre
regions = []
@tokenizer.tokenize( text ) do |tok|
value = html_escape(tok)
case tok.instruction
when :region_close then
regions.pop
html << "</span>"
when :region_open then
regions.push tok.group
html << "<span class=\"#{tok.group}\">#{value}"
else
if tok.group == ( regions.last || :normal )
html << value
else
html << "<span class=\"#{tok.group}\">#{value}</span>"
end
end
end
html << "</span>" while regions.pop
html << "</pre>" if pre
html
end
private
# Replaces some characters with their corresponding HTML entities.
def html_escape( string )
string.gsub( /&/, "&amp;" ).
gsub( /</, "&lt;" ).
gsub( />/, "&gt;" ).
gsub( /"/, "&quot;" )
end
end
end
end

317
lib/syntax/lang/ruby.rb Normal file
View File

@ -0,0 +1,317 @@
require 'syntax'
module Syntax
# A tokenizer for the Ruby language. It recognizes all common syntax
# (and some less common syntax) but because it is not a true lexer, it
# will make mistakes on some ambiguous cases.
class Ruby < Tokenizer
# The list of all identifiers recognized as keywords.
KEYWORDS =
%w{if then elsif else end begin do rescue ensure while for
class module def yield raise until unless and or not when
case super undef break next redo retry in return alias
defined?}
# Perform ruby-specific setup
def setup
@selector = false
@allow_operator = false
@heredocs = []
end
# Step through a single iteration of the tokenization process.
def step
case
when bol? && check( /=begin/ )
start_group( :comment, scan_until( /^=end#{EOL}/ ) )
when bol? && check( /__END__#{EOL}/ )
start_group( :comment, scan_until( /\Z/ ) )
else
case
when check( /def\s+/ )
start_group :keyword, scan( /def\s+/ )
start_group :method, scan_until( /(?=[;(\s]|#{EOL})/ )
when check( /class\s+/ )
start_group :keyword, scan( /class\s+/ )
start_group :class, scan_until( /(?=[;\s<]|#{EOL})/ )
when check( /module\s+/ )
start_group :keyword, scan( /module\s+/ )
start_group :module, scan_until( /(?=[;\s]|#{EOL})/ )
when check( /::/ )
start_group :punct, scan(/::/)
when check( /:"/ )
start_group :symbol, scan(/:/)
scan_delimited_region :symbol, :symbol, "", true
@allow_operator = true
when check( /:'/ )
start_group :symbol, scan(/:/)
scan_delimited_region :symbol, :symbol, "", false
@allow_operator = true
when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ )
start_group :symbol, matched
@allow_operator = true
when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ )
start_group :char, matched
@allow_operator = true
when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
if @selector || matched[-1] == ?? || matched[-1] == ?!
start_group :ident,
scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/)
else
start_group :constant,
scan(/(__FILE__|__LINE__|true|false|nil|self)/)
end
@selector = false
@allow_operator = true
when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
start_group :number, matched
@allow_operator = true
else
case peek(2)
when "%r"
scan_delimited_region :punct, :regex, scan( /../ ), true
@allow_operator = true
when "%w", "%q"
scan_delimited_region :punct, :string, scan( /../ ), false
@allow_operator = true
when "%s"
scan_delimited_region :punct, :symbol, scan( /../ ), false
@allow_operator = true
when "%W", "%Q", "%x"
scan_delimited_region :punct, :string, scan( /../ ), true
@allow_operator = true
when /%[^\sa-zA-Z0-9]/
scan_delimited_region :punct, :string, scan( /./ ), true
@allow_operator = true
when "<<"
saw_word = ( chunk[-1,1] =~ /[\w!?]/ )
start_group :punct, scan( /<</ )
if saw_word
@allow_operator = false
return
end
float_right = scan( /-/ )
append "-" if float_right
if ( type = scan( /['"]/ ) )
append type
delim = scan_until( /(?=#{type})/ )
if delim.nil?
append scan_until( /\Z/ )
return
end
else
delim = scan( /\w+/ ) or return
end
start_group :constant, delim
start_group :punct, scan( /#{type}/ ) if type
@heredocs << [ float_right, type, delim ]
@allow_operator = true
else
case peek(1)
when /[\n\r]/
unless @heredocs.empty?
scan_heredoc(*@heredocs.shift)
else
start_group :normal, scan( /\s+/ )
end
@allow_operator = false
when /\s/
start_group :normal, scan( /\s+/ )
when "#"
start_group :comment, scan( /#[^\n\r]*/ )
when /[A-Z]/
start_group @selector ? :ident : :constant, scan( /\w+/ )
@allow_operator = true
when /[a-z_]/
word = scan( /\w+[?!]?/ )
if !@selector && KEYWORDS.include?( word )
start_group :keyword, word
@allow_operator = false
elsif
start_group :ident, word
@allow_operator = true
end
@selector = false
when /\d/
start_group :number,
scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ )
@allow_operator = true
when '"'
scan_delimited_region :punct, :string, "", true
@allow_operator = true
when '/'
if @allow_operator
start_group :punct, scan(%r{/})
@allow_operator = false
else
scan_delimited_region :punct, :regex, "", true
@allow_operator = true
end
when "'"
scan_delimited_region :punct, :string, "", false
@allow_operator = true
when "."
dots = scan( /\.{1,3}/ )
start_group :punct, dots
@selector = ( dots.length == 1 )
when /[@]/
start_group :attribute, scan( /@{1,2}\w*/ )
@allow_operator = true
when /[$]/
start_group :global, scan(/\$/)
start_group :global, scan( /\w+|./ ) if check(/./)
@allow_operator = true
when /[-!?*\/+=<>(\[\{}:;,&|%]/
start_group :punct, scan(/./)
@allow_operator = false
when /[)\]]/
start_group :punct, scan(/./)
@allow_operator = true
else
# all else just falls through this, to prevent
# infinite loops...
append getch
end
end
end
end
end
private
# Scan a delimited region of text. This handles the simple cases (strings
# delimited with quotes) as well as the more complex cases of %-strings
# and here-documents.
#
# * +delim_group+ is the group to use to classify the delimiters of the
# region
# * +inner_group+ is the group to use to classify the contents of the
# region
# * +starter+ is the text to use as the starting delimiter
# * +exprs+ is a boolean flag indicating whether the region is an
# interpolated string or not
# * +delim+ is the text to use as the delimiter of the region. If +nil+,
# the next character will be treated as the delimiter.
# * +heredoc+ is either +false+, meaning the region is not a heredoc, or
# <tt>:flush</tt> (meaning the delimiter must be flushed left), or
# <tt>:float</tt> (meaning the delimiter doens't have to be flush left).
def scan_delimited_region( delim_group, inner_group, starter, exprs,
delim=nil, heredoc=false )
# begin
if !delim
start_group delim_group, starter
delim = scan( /./ )
append delim
delim = case delim
when '{' then '}'
when '(' then ')'
when '[' then ']'
when '<' then '>'
else delim
end
end
start_region inner_group
items = "\\\\|"
if heredoc
items << "(^"
items << '\s*' if heredoc == :float
items << "#{Regexp.escape(delim)}\s*?)#{EOL}"
else
items << "#{Regexp.escape(delim)}"
end
items << "|#(\\$|@@?|\\{)" if exprs
items = Regexp.new( items )
loop do
p = pos
match = scan_until( items )
if match.nil?
start_group inner_group, scan_until( /\Z/ )
break
else
text = pre_match[p..-1]
start_group inner_group, text if text.length > 0
case matched.strip
when "\\"
unless exprs
case peek(1)
when "'"
scan(/./)
start_group :escape, "\\'"
when "\\"
scan(/./)
start_group :escape, "\\\\"
else
start_group inner_group, "\\"
end
else
start_group :escape, "\\"
c = getch
append c
case c
when 'x'
append scan( /[a-fA-F0-9]{1,2}/ )
when /[0-7]/
append scan( /[0-7]{0,2}/ )
end
end
when delim
end_region inner_group
start_group delim_group, matched
break
when /^#/
do_highlight = (option(:expressions) == :highlight)
start_region :expr if do_highlight
start_group :expr, matched
case matched[1]
when ?{
depth = 1
content = ""
while depth > 0
p = pos
c = scan_until( /[\{}]/ )
if c.nil?
content << scan_until( /\Z/ )
break
else
depth += ( matched == "{" ? 1 : -1 )
content << pre_match[p..-1]
content << matched if depth > 0
end
end
if do_highlight
subtokenize "ruby", content
start_group :expr, "}"
else
append content + "}"
end
when ?$, ?@
append scan( /\w+/ )
end
end_region :expr if do_highlight
else raise "unexpected match on #{matched}"
end
end
end
end
# Scan a heredoc beginning at the current position.
#
# * +float+ indicates whether the delimiter may be floated to the right
# * +type+ is +nil+, a single quote, or a double quote
# * +delim+ is the delimiter to look for
def scan_heredoc(float, type, delim)
scan_delimited_region( :constant, :string, "", type != "'",
delim, float ? :float : :flush )
end
end
SYNTAX["ruby"] = Ruby
end

108
lib/syntax/lang/xml.rb Normal file
View File

@ -0,0 +1,108 @@
require 'syntax'
module Syntax
# A simple implementation of an XML lexer. It handles most cases. It is
# not a validating lexer, meaning it will happily process invalid XML without
# complaining.
class XML < Tokenizer
# Initialize the lexer.
def setup
@in_tag = false
end
# Step through a single iteration of the tokenization process. This will
# yield (potentially) many tokens, and possibly zero tokens.
def step
start_group :normal, matched if scan( /\s+/ )
if @in_tag
case
when scan( /([-\w]+):([-\w]+)/ )
start_group :namespace, subgroup(1)
start_group :punct, ":"
start_group :attribute, subgroup(2)
when scan( /\d+/ )
start_group :number, matched
when scan( /[-\w]+/ )
start_group :attribute, matched
when scan( %r{[/?]?>} )
@in_tag = false
start_group :punct, matched
when scan( /=/ )
start_group :punct, matched
when scan( /["']/ )
scan_string matched
else
append getch
end
elsif ( text = scan_until( /(?=[<&])/ ) )
start_group :normal, text unless text.empty?
if scan(/<!--.*?(-->|\Z)/m)
start_group :comment, matched
else
case peek(1)
when "<"
start_group :punct, getch
case peek(1)
when "?"
append getch
when "/"
append getch
when "!"
append getch
end
start_group :normal, matched if scan( /\s+/ )
if scan( /([-\w]+):([-\w]+)/ )
start_group :namespace, subgroup(1)
start_group :punct, ":"
start_group :tag, subgroup(2)
elsif scan( /[-\w]+/ )
start_group :tag, matched
end
@in_tag = true
when "&"
if scan( /&\S{1,10};/ )
start_group :entity, matched
else
start_group :normal, scan( /&/ )
end
end
end
else
append scan_until( /\Z/ )
end
end
private
# Scan the string starting at the current position, with the given
# delimiter character.
def scan_string( delim )
start_group :punct, delim
match = /(?=[&\\]|#{delim})/
loop do
break unless ( text = scan_until( match ) )
start_group :string, text unless text.empty?
case peek(1)
when "&"
if scan( /&\S{1,10};/ )
start_group :entity, matched
else
start_group :string, getch
end
when "\\"
start_group :string, getch
append getch || ""
when delim
start_group :punct, getch
break
end
end
end
end
SYNTAX["xml"] = XML
end

105
lib/syntax/lang/yaml.rb Normal file
View File

@ -0,0 +1,105 @@
require 'syntax'
module Syntax
# A simple implementation of an YAML lexer. It handles most cases. It is
# not a validating lexer.
class YAML < Tokenizer
# Step through a single iteration of the tokenization process. This will
# yield (potentially) many tokens, and possibly zero tokens.
def step
if bol?
case
when scan(/---(\s*.+)?$/)
start_group :document, matched
when scan(/(\s*)([a-zA-Z][-\w]*)(\s*):/)
start_group :normal, subgroup(1)
start_group :key, subgroup(2)
start_group :normal, subgroup(3)
start_group :punct, ":"
when scan(/(\s*)-/)
start_group :normal, subgroup(1)
start_group :punct, "-"
when scan(/\s*$/)
start_group :normal, matched
when scan(/#.*$/)
start_group :comment, matched
else
append getch
end
else
case
when scan(/[\n\r]+/)
start_group :normal, matched
when scan(/[ \t]+/)
start_group :normal, matched
when scan(/!+(.*?^)?\S+/)
start_group :type, matched
when scan(/&\S+/)
start_group :anchor, matched
when scan(/\*\S+/)
start_group :ref, matched
when scan(/\d\d:\d\d:\d\d/)
start_group :time, matched
when scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/)
start_group :date, matched
when scan(/['"]/)
start_group :punct, matched
scan_string matched
when scan(/:\w+/)
start_group :symbol, matched
when scan(/[:]/)
start_group :punct, matched
when scan(/#.*$/)
start_group :comment, matched
when scan(/>-?/)
start_group :punct, matched
start_group :normal, scan(/.*$/)
append getch until eos? || bol?
return if eos?
indent = check(/ */)
start_group :string
loop do
line = check_until(/[\n\r]|\Z/)
break if line.nil?
if line.chomp.length > 0
this_indent = line.chomp.match( /^\s*/ )[0]
break if this_indent.length < indent.length
end
append scan_until(/[\n\r]|\Z/)
end
else
start_group :normal, scan_until(/(?=$|#)/)
end
end
end
private
def scan_string( delim )
regex = /(?=[#{delim=="'" ? "" : "\\\\"}#{delim}])/
loop do
text = scan_until( regex )
if text.nil?
start_group :string, scan_until( /\Z/ )
break
else
start_group :string, text unless text.empty?
end
case peek(1)
when "\\"
start_group :expr, scan(/../)
else
start_group :punct, getch
break
end
end
end
end
SYNTAX["yaml"] = YAML
end

9
lib/syntax/version.rb Normal file
View File

@ -0,0 +1,9 @@
module Syntax
module Version
MAJOR=1
MINOR=0
TINY=0
STRING=[MAJOR,MINOR,TINY].join('.')
end
end

3
log/production.log Normal file
View File

@ -0,0 +1,3 @@
# Logfile created on Mon Jan 22 07:45:04 CST 2007 by logger.rb/1.5.2.7
Migrating to Beta1Schema (1)
Migrating to Beta2ChangesBulk (2)

View File

@ -317,4 +317,9 @@ div.errorExplanation p,div.errorExplanation li {
border:none;
margin:0;
padding:0;
}
}
merror {display:inline;font-size:1em;}
math[display=block] {overflow:auto;}
math { white-space: nowrap }
.maruku-eq-number {float:right}