diff --git a/app/controllers/application.rb b/app/controllers/application.rb index 59c43eb0..b4838822 100644 --- a/app/controllers/application.rb +++ b/app/controllers/application.rb @@ -48,6 +48,7 @@ class ApplicationController < ActionController::Base '.pdf' => 'application/pdf', '.png' => 'image/png', '.txt' => 'text/plain', + '.tex' => 'text/plain', '.zip' => 'application/zip' } unless defined? FILE_TYPES @@ -114,7 +115,7 @@ class ApplicationController < ActionController::Base def rescue_action_in_public(exception) render :status => 500, :text => <<-EOL - +

Internal Error

An application error occurred while processing your request.

@@ -145,8 +146,10 @@ class ApplicationController < ActionController::Base def set_content_type_header if %w(rss_with_content rss_with_headlines).include?(action_name) @response.headers['Content-Type'] = 'text/xml; charset=UTF-8' + elsif %w(tex).include?(action_name) + @response.headers['Content-Type'] = 'text/plain; charset=UTF-8' else - @response.headers['Content-Type'] = 'text/html; charset=UTF-8' + @response.headers['Content-Type'] = 'application/xhtml+xml; charset=UTF-8' end end diff --git a/app/controllers/wiki_controller.rb b/app/controllers/wiki_controller.rb index 6bf52f7c..181ed53c 100644 --- a/app/controllers/wiki_controller.rb +++ b/app/controllers/wiki_controller.rb @@ -9,7 +9,7 @@ class WikiController < ApplicationController caches_action :show, :published, :authors, :recently_revised, :list cache_sweeper :revision_sweeper - layout 'default', :except => [:rss_feed, :rss_with_content, :rss_with_headlines, :tex, :export_tex, :export_html] + layout 'default', :except => [:rss_feed, :rss_with_content, :rss_with_headlines, :tex, :pdf, :export_tex, :export_html] def index if @web_name @@ -280,8 +280,12 @@ class WikiController < ApplicationController end def tex + if @web.markup == :markdownMML + @tex_content = Maruku.new(@page.content).to_latex + else @tex_content = RedClothForTex.new(@page.content).to_tex end + end protected @@ -305,8 +309,12 @@ class WikiController < ApplicationController end def export_page_to_tex(file_path) - tex - File.open(file_path, 'w') { |f| f.write(render_to_string(:template => 'wiki/tex', :layout => false)) } + if @web.markup == :markdownMML + @tex_content = Maruku.new(@page.content).to_latex + else + @tex_content = RedClothForTex.new(@page.content).to_tex + end + File.open(file_path, 'w') { |f| f.write(render_to_string(:template => 'wiki/tex', :layout => 'tex')) } end def export_pages_as_zip(file_type, &block) @@ -396,7 +404,11 @@ class WikiController < ApplicationController def render_tex_web @web.select.by_name.inject({}) do |tex_web, page| + if @web.markup == :markdownMML + tex_web[page.name] = Maruku.new(page.content).to_latex + else tex_web[page.name] = RedClothForTex.new(page.content).to_tex + end tex_web end end diff --git a/app/helpers/application_helper.rb b/app/helpers/application_helper.rb index 67d7ae86..e33bc0fc 100644 --- a/app/helpers/application_helper.rb +++ b/app/helpers/application_helper.rb @@ -23,7 +23,7 @@ module ApplicationHelper if element.last != selected options << "" else - options << "" + options << "" end else options << ((element != selected) ? "" : "") diff --git a/app/views/admin/create_system.rhtml b/app/views/admin/create_system.rhtml index f5d5ff7f..0ef6175c 100644 --- a/app/views/admin/create_system.rhtml +++ b/app/views/admin/create_system.rhtml @@ -7,7 +7,7 @@

<%= form_tag({ :controller => 'admin', :action => 'create_system' }, - { 'id' => 'setup', 'method' => 'post', 'onSubmit' => 'return validateSetup()', + { 'id' => 'setup', 'method' => 'post', 'onsubmit' => 'return validateSetup()', 'accept-charset' => 'utf-8' }) %>
    @@ -22,9 +22,9 @@
    Name: + onchange="proposeAddress();" onclick="this.value == 'Wiki' ? this.value = '' : true" />    - Address:
    diff --git a/app/views/admin/create_web.rhtml b/app/views/admin/create_web.rhtml index 5b9e3f7e..86c4675d 100644 --- a/app/views/admin/create_web.rhtml +++ b/app/views/admin/create_web.rhtml @@ -7,7 +7,7 @@ <%= form_tag({ :controller => 'admin', :action => 'create_web' }, { 'id' => 'setup', 'method' => 'post', - 'onSubmit' => 'cleanAddress(); return validateSetup()', + 'onsubmit' => 'cleanAddress(); return validateSetup()', 'accept-charset' => 'utf-8' }) %> @@ -21,9 +21,9 @@ The address can only consist of letters and digits.
    - Name: + Name:    - Address: + Address:
diff --git a/app/views/admin/edit_web.rhtml b/app/views/admin/edit_web.rhtml index 3062892f..50ff95f1 100644 --- a/app/views/admin/edit_web.rhtml +++ b/app/views/admin/edit_web.rhtml @@ -2,7 +2,7 @@ <%= form_tag({ :controller => 'admin', :action => 'edit_web', :web => @web.address }, { 'id' => 'setup', 'method' => 'post', - 'onSubmit' => 'cleanAddress(); return validateSetup()', + 'onsubmit' => 'cleanAddress(); return validateSetup()', 'accept-charset' => 'utf-8' }) %> @@ -15,9 +15,9 @@
Name:    + onchange="proposeAddress();" />    Address: + onchange="cleanAddress();" /> (Letters and digits only)
@@ -25,7 +25,7 @@
Markup: @@ -64,7 +64,7 @@

+ onclick="document.getElementById('additionalStyle').style.display='block';return false;"> Stylesheet tweaks >> - add or change styles used by this web; styles defined here take precedence over @@ -120,7 +120,7 @@ <%= form_tag({:controller => 'admin', :web => @web.address, :action => 'remove_orphaned_pages'}, { :id => 'remove_orphaned_pages', - :onSubmit => "return checkSystemPassword(document.getElementById('system_password_orphaned').value)", + :onsubmit => "return checkSystemPassword(document.getElementById('system_password_orphaned').value)", 'accept-charset' => 'utf-8' }) %>

diff --git a/app/views/file/import.rhtml b/app/views/file/import.rhtml index 910ccef4..0d71ee3a 100644 --- a/app/views/file/import.rhtml +++ b/app/views/file/import.rhtml @@ -13,7 +13,7 @@

as + onclick="this.value == 'AnonymousCoward' ? this.value = '' : true" /> <% if @page %> | <%= link_to 'Cancel', :web => @web.address, :action => 'file'%> (unlocks page) <% end %> diff --git a/app/views/layouts/default.rhtml b/app/views/layouts/default.rhtml index 67b85e06..6c3a8132 100644 --- a/app/views/layouts/default.rhtml +++ b/app/views/layouts/default.rhtml @@ -1,6 +1,4 @@ - + diff --git a/app/views/layouts/tex.rhtml b/app/views/layouts/tex.rhtml new file mode 100644 index 00000000..36ba17d8 --- /dev/null +++ b/app/views/layouts/tex.rhtml @@ -0,0 +1 @@ +<%= @content_for_layout %> diff --git a/app/views/markdownMML_help.rhtml b/app/views/markdownMML_help.rhtml new file mode 100644 index 00000000..6a29a158 --- /dev/null +++ b/app/views/markdownMML_help.rhtml @@ -0,0 +1,13 @@ +<h3>Markdown + itex2MML formatting tips (<a target="_new" href="http://daringfireball.net/projects/markdown/syntax">advanced</a>)</h3> +<table cellspacing="0" cellpadding="0"> + <tr><td>_your text_</td><td class="arrow">→</td><td><em>your text</em></td></tr> + <tr><td>**your text**</td><td class="arrow">→</td><td><strong>your text</strong></td></tr> + <tr><td>`my code`</td><td class="arrow">→</td><td><code>my code</code></td></tr> + <tr><td>* Bulleted list<br />* Second item</td><td class="arrow">→</td><td>• Bulleted list<br />• Second item</td></tr> + <tr><td>1. Numbered list<br />1. Second item</td><td class="arrow">→</td><td>1. Numbered list<br />2. Second item</td></tr> + <tr><td>[link name](URL)</td><td class="arrow">→</td><td><a href="URL">link name</a></td></tr> + <tr><td>***</td><td class="arrow">→</td><td>Horizontal ruler</td></tr> + <tr><td><http://url><br /><email@add.com></td><td class="arrow">→</td><td>Auto-linked</td></tr> + <tr><td>![Alt text](URL)</td><td class="arrow">→</td><td>Image</td></tr> +</table> +<p>For a complete list of LaTeX commands supported here, see the <a href="http://golem.ph.utexas.edu/~distler/blog/itex2MMLcommands.html">itex2MML Commands Summary</a>.</p> diff --git a/app/views/textile_help.rhtml b/app/views/textile_help.rhtml index 3d8400b3..78f9a91e 100644 --- a/app/views/textile_help.rhtml +++ b/app/views/textile_help.rhtml @@ -1,4 +1,4 @@ -<h3>Textile formatting tips (<a href="http://hobix.com/textile/quick.html" onClick="quickRedReference(); return false;">advanced</a>)</h3> +<h3>Textile formatting tips (<a href="http://hobix.com/textile/quick.html" onclick="quickRedReference(); return false;">advanced</a>)</h3> <table cellspacing="0" cellpadding="0"> <tr><td>_your text_</td><td class="arrow">→</td><td><em>your text</em></td></tr> <tr><td>*your text*</td><td class="arrow">→</td><td><strong>your text</strong></td></tr> diff --git a/app/views/wiki/edit.rhtml b/app/views/wiki/edit.rhtml index ad7df15d..dc6e6ae9 100644 --- a/app/views/wiki/edit.rhtml +++ b/app/views/wiki/edit.rhtml @@ -11,7 +11,7 @@ <div id="editForm"> <%= form_tag({ :action => 'save', :web => @web.address, :id => @page.name }, - { 'id' => 'editForm', 'method' => 'post', 'onSubmit' => 'cleanAuthorName()', + { 'id' => 'editForm', 'method' => 'post', 'onsubmit' => 'cleanAuthorName()', 'accept-charset' => 'utf-8' }) %> <textarea name="content" id="content"><%= h(@flash[:content] || @page.content) %></textarea> diff --git a/app/views/wiki/new.rhtml b/app/views/wiki/new.rhtml index cda26081..78086c1b 100644 --- a/app/views/wiki/new.rhtml +++ b/app/views/wiki/new.rhtml @@ -11,7 +11,7 @@ <div id="editForm"> <%= form_tag({ :action => 'save', :web => @web.address, :id => @page_name }, - { 'id' => 'editForm', 'method' => 'post', 'onSubmit' => 'cleanAuthorName();', 'accept-charset' => 'utf-8' }) %> + { 'id' => 'editForm', 'method' => 'post', 'onsubmit' => 'cleanAuthorName();', 'accept-charset' => 'utf-8' }) %> <textarea name="content" id="content"><%= h(@flash[:content] || '') %></textarea> <div id="editFormButtons"> diff --git a/app/views/wiki/page.rhtml b/app/views/wiki/page.rhtml index 725f1184..6974bbf6 100644 --- a/app/views/wiki/page.rhtml +++ b/app/views/wiki/page.rhtml @@ -37,7 +37,7 @@ <%= link_to('Print', { :web => @web.address, :action => 'print', :id => @page.name }, { :accesskey => 'p', :name => 'view_print' }) %> - <% if defined? RedClothForTex and RedClothForTex.available? and @web.markup == :textile %> + <% if defined? RedClothForTex and RedClothForTex.available? and @web.markup == :textile or @web.markup == :markdownMML %> | <%= link_to 'TeX', {:web => @web.address, :action => 'tex', :id => @page.name}, {:name => 'view_tex'} %> diff --git a/app/views/wiki/rollback.rhtml b/app/views/wiki/rollback.rhtml index 0e4cbea2..a165c71d 100644 --- a/app/views/wiki/rollback.rhtml +++ b/app/views/wiki/rollback.rhtml @@ -13,13 +13,13 @@ <div id="editForm"> <%= form_tag({:web => @web.address, :action => 'save', :id => @page.name}, - { :id => 'editForm', :method => 'post', :onSubmit => 'cleanAuthorName();', + { :id => 'editForm', :method => 'post', :onsubmit => 'cleanAuthorName();', 'accept-charset' => 'utf-8' }) %> <textarea name="content" id="content"><%= @revision.content %></textarea> <div id="editFormButtons"> <input type="submit" value="Update" accesskey="u" /> as <input type="text" name="author" id="authorName" value="<%= @author %>" - onClick="this.value == 'AnonymousCoward' ? this.value = '' : true" /> + onclick="this.value == 'AnonymousCoward' ? this.value = '' : true" /> | <span> <%= link_to('Cancel', {:web => @web.address, :action => 'cancel_edit', :id => @page.name}, diff --git a/app/views/wiki/tex.rhtml b/app/views/wiki/tex.rhtml index ea9a06c6..6ac2fa5d 100644 --- a/app/views/wiki/tex.rhtml +++ b/app/views/wiki/tex.rhtml @@ -1,12 +1,12 @@ \documentclass[12pt,titlepage]{article} -\usepackage[danish]{babel} %danske tekster +\usepackage{amsmath} +\usepackage{amsfonts} \usepackage[OT1]{fontenc} %rigtige danske bogstaver... -\usepackage{a4} \usepackage{graphicx} \usepackage{ucs} \usepackage[utf8x]{inputenc} -\input epsf +\usepackage{hyperref} %------------------------------------------------------------------- diff --git a/db/production.db.sqlite3 b/db/production.db.sqlite3 new file mode 100644 index 00000000..649297cd Binary files /dev/null and b/db/production.db.sqlite3 differ diff --git a/db/schema.rb b/db/schema.rb new file mode 100644 index 00000000..2147c8fb --- /dev/null +++ b/db/schema.rb @@ -0,0 +1,78 @@ +# This file is autogenerated. Instead of editing this file, please use the +# migrations feature of ActiveRecord to incrementally modify your database, and +# then regenerate this schema definition. + +ActiveRecord::Schema.define(:version => 2) do + + create_table "pages", :force => true do |t| + t.column "created_at", :datetime, :null => false + t.column "updated_at", :datetime, :null => false + t.column "web_id", :integer, :default => 0, :null => false + t.column "locked_by", :string, :limit => 60 + t.column "name", :string, :limit => 60 + t.column "locked_at", :datetime + end + + create_table "revisions", :force => true do |t| + t.column "created_at", :datetime, :null => false + t.column "updated_at", :datetime, :null => false + t.column "revised_at", :datetime, :null => false + t.column "page_id", :integer, :default => 0, :null => false + t.column "content", :text, :default => "", :null => false + t.column "author", :string, :limit => 60 + t.column "ip", :string, :limit => 60 + end + + add_index "revisions", ["author"], :name => "revisions_author_index" + add_index "revisions", ["created_at"], :name => "revisions_created_at_index" + add_index "revisions", ["page_id"], :name => "revisions_page_id_index" + + create_table "sessions", :force => true do |t| + t.column "session_id", :string + t.column "data", :text + t.column "updated_at", :datetime + end + + add_index "sessions", ["session_id"], :name => "sessions_session_id_index" + + create_table "system", :force => true do |t| + t.column "password", :string, :limit => 60 + end + + create_table "webs", :force => true do |t| + t.column "created_at", :datetime, :null => false + t.column "updated_at", :datetime, :null => false + t.column "name", :string, :limit => 60, :default => "", :null => false + t.column "address", :string, :limit => 60, :default => "", :null => false + t.column "password", :string, :limit => 60 + t.column "additional_style", :string + t.column "allow_uploads", :integer, :default => 1 + t.column "published", :integer, :default => 0 + t.column "count_pages", :integer, :default => 0 + t.column "markup", :string, :limit => 50, :default => "textile" + t.column "color", :string, :limit => 6, :default => "008B26" + t.column "max_upload_size", :integer, :default => 100 + t.column "safe_mode", :integer, :default => 0 + t.column "brackets_only", :integer, :default => 0 + end + + create_table "wiki_files", :force => true do |t| + t.column "created_at", :datetime, :null => false + t.column "updated_at", :datetime, :null => false + t.column "web_id", :integer, :null => false + t.column "file_name", :string, :null => false + t.column "description", :string, :null => false + end + + create_table "wiki_references", :force => true do |t| + t.column "created_at", :datetime, :null => false + t.column "updated_at", :datetime, :null => false + t.column "page_id", :integer, :default => 0, :null => false + t.column "referenced_name", :string, :limit => 60, :default => "", :null => false + t.column "link_type", :string, :limit => 1, :default => "", :null => false + end + + add_index "wiki_references", ["referenced_name"], :name => "wiki_references_referenced_name_index" + add_index "wiki_references", ["page_id"], :name => "wiki_references_page_id_index" + +end diff --git a/instiki b/instiki index db95d002..bf720a1c 100755 --- a/instiki +++ b/instiki @@ -1,7 +1,6 @@ -#!/bin/sh +#!/usr/bin/env ruby -cd $(dirname $0) - -export LD_LIBRARY_PATH=./lib/native/linux-x86:$LD_LIBRARY_PATH -ruby script/server +# Executable file for a gem +# must be same as ./instiki.rb +load File.dirname(__FILE__) + '/script/server' diff --git a/lib/bluecloth_tweaked.rb b/lib/bluecloth_tweaked.rb index b91622f1..92883441 100644 --- a/lib/bluecloth_tweaked.rb +++ b/lib/bluecloth_tweaked.rb @@ -1112,7 +1112,7 @@ class BlueCloth < String ### Return a copy of +str+ with angle brackets and ampersands HTML-encoded. def encode_html( str ) - str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w{1,8});)/i, "&" ). + str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w+);)/i, "&" ). gsub( %r{<(?![a-z/?\$!])}i, "<" ) end diff --git a/lib/chunks/engines.rb b/lib/chunks/engines.rb index 3b7b5bbe..82808123 100644 --- a/lib/chunks/engines.rb +++ b/lib/chunks/engines.rb @@ -40,6 +40,14 @@ module Engines end end + class MarkdownMML < AbstractEngine + def mask + require_dependency 'maruku' + require_dependency 'maruku/ext/math' + Maruku.new(@content.delete("\r")).to_html + end + end + class Mixed < AbstractEngine def mask require_dependency 'redcloth' @@ -57,6 +65,6 @@ module Engines end end - MAP = { :textile => Textile, :markdown => Markdown, :mixed => Mixed, :rdoc => RDoc } + MAP = { :textile => Textile, :markdown => Markdown, :markdownMML => MarkdownMML, :mixed => Mixed, :rdoc => RDoc } MAP.default = Textile end diff --git a/lib/maruku.rb b/lib/maruku.rb new file mode 100644 index 00000000..c9e26ab1 --- /dev/null +++ b/lib/maruku.rb @@ -0,0 +1,133 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + +require 'rexml/document' + +# :include:MaRuKu.txt +module MaRuKu + + module In + module Markdown + module SpanLevelParser; end + module BlockLevelParser; end + end + # more to come? + end + + module Out + # Functions for exporting to MarkDown. + module Markdown; end + # Functions for exporting to HTML. + module HTML; end + # Functions for exporting to Latex + module Latex; end + end + + # These are strings utilities. + module Strings; end + + module Helpers; end + + module Errors; end + + class MDElement + include REXML + include MaRuKu + include Out::Markdown + include Out::HTML + include Out::Latex + include Strings + include Helpers + include Errors + end + + + class MDDocument < MDElement + include In::Markdown + include In::Markdown::SpanLevelParser + include In::Markdown::BlockLevelParser + end +end + +# This is the public interface +class Maruku < MaRuKu::MDDocument; end + + + +require 'rexml/document' + +# Structures definition +require 'maruku/structures' +require 'maruku/structures_inspect' + +require 'maruku/defaults' +# Less typing +require 'maruku/helpers' + +# Code for parsing whole Markdown documents +require 'maruku/input/parse_doc' + +# Ugly things kept in a closet +require 'maruku/string_utils' +require 'maruku/input/linesource' +require 'maruku/input/type_detection' + +# A class for reading and sanitizing inline HTML +require 'maruku/input/html_helper' + +# Code for parsing Markdown block-level elements +require 'maruku/input/parse_block' + +# Code for parsing Markdown span-level elements +require 'maruku/input/charsource' +require 'maruku/input/parse_span_better' +require 'maruku/input/rubypants' + +require 'maruku/input/extensions' + +require 'maruku/attributes' + +require 'maruku/structures_iterators' + +require 'maruku/errors_management' + +# Code for creating a table of contents +require 'maruku/toc' + +# Version and URL +require 'maruku/version' + + +# Exporting to html +require 'maruku/output/to_html' + +# Exporting to latex +require 'maruku/output/to_latex' +require 'maruku/output/to_latex_strings' +require 'maruku/output/to_latex_entities' + +# Pretty print +require 'maruku/output/to_markdown' + +# Exporting to text: strips all formatting (not complete) +require 'maruku/output/to_s' + +# class Maruku is the global interface +require 'maruku/maruku' diff --git a/lib/maruku/attic/parse_span.rb.txt b/lib/maruku/attic/parse_span.rb.txt new file mode 100644 index 00000000..71c57711 --- /dev/null +++ b/lib/maruku/attic/parse_span.rb.txt @@ -0,0 +1,462 @@ +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + + + + +# NOTE: this is the old span-level regexp-based parser. +# +# The new parser is a real parser and is defined with functions in parse_span_better.rb +# The new parser is faster, handles syntax errors, but it's absolutely not readable. +# +# Also, regexp parsers simply CANNOT handle inline HTML properly. + + + +# There are two black-magic methods `match_couple_of` and `map_match`, +# defined at the end of the file, that make the function +# `parse_lines_as_span` so elegant. + +class Maruku + + # Takes care of all span-level formatting, links, images, etc. + # + # Lines must not contain block-level elements. + def parse_lines_as_span(lines) + + # first, get rid of linebreaks + res = resolve_linebreaks(lines) + + span = MDElement.new(:dummy, res) + + # encode all escapes + span.replace_each_string { |s| s.escape_md_special } + + +# The order of processing is significant: +# 1. inline code +# 2. immediate links +# 3. inline HTML +# 4. everything else + + # search for ``code`` markers + span.match_couple_of('``') { |children, match1, match2| + e = create_md_element(:inline_code) + # this is now opaque to processing + e.meta[:raw_code] = children.join('').it_was_a_code_block + e + } + + # Search for `single tick` code markers + span.match_couple_of('`') { |children, match1, match2| + e = create_md_element(:inline_code) + # this is now opaque to processing + e.meta[:raw_code] = children.join('').it_was_a_code_block + # this is now opaque to processing + e + } + + # Detect any immediate link: <http://www.google.com> + # we expect an http: or something: at the beginning + span.map_match( /<(\w+:[^\>]+)>/) { |match| + url = match[1] + + e = create_md_element(:immediate_link, []) + e.meta[:url] = url + e + } + + # Search for inline HTML (the support is pretty basic for now) + + # this searches for a matching block + inlineHTML1 = %r{ + ( # put everything in 1 + < # open + (\w+) # opening tag in 2 + > # close + .* # anything + </\2> # match closing tag + ) + }x + + # this searches for only one block + inlineHTML2 = %r{ + ( # put everything in 1 + < # open + \w+ # + # close + [^<>]* # anything except + /> # closing tag + ) + }x + + for reg in [inlineHTML1, inlineHTML2] + span.map_match(reg) { |match| + raw_html = match[1] + convert_raw_html_in_list(raw_html) + } + end + + # Detect footnotes references: [^1] + span.map_match(/\[(\^[^\]]+)\]/) { |match| + id = match[1].strip.downcase + e = create_md_element(:footnote_reference) + e.meta[:footnote_id] = id + e + } + + # Detect any image like ![Alt text][url] + span.map_match(/\!\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match| + alt = match[1] + id = match[2].strip.downcase + + if id.size == 0 + id = text.strip.downcase + end + + e = create_md_element(:image) + e.meta[:ref_id] = id + e + } + + # Detect any immage with immediate url: ![Alt](url "title") + # a dummy ref is created and put in the symbol table + link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/ + span.map_match(link1) { |match| + alt = match[1] + url = match[2] + title = match[3] + + url = url.strip + # create a dummy id + id="dummy_#{@refs.size}" + @refs[id] = {:url=>url, :title=>title} + + e = create_md_element(:image) + e.meta[:ref_id] = id + e + } + + # an id reference: "[id]", "[ id ]" + reg_id_ref = %r{ + \[ # opening bracket + ([^\]]*) # 0 or more non-closing bracket (this is too permissive) + \] # closing bracket + }x + + + # validates a url, only $1 is set to the url + reg_url = + /((?:\w+):\/\/(?:\w+:{0,1}\w*@)?(?:\S+)(?::[0-9]+)?(?:\/|\/([\w#!:.?+=&%@!\-\/]))?)/ + reg_url = %r{([^\s\]\)]+)} + + # A string enclosed in quotes. + reg_title = %r{ + " # opening + [^"]* # anything = 1 + " # closing + }x + + # [bah](http://www.google.com "Google.com"), + # [bah](http://www.google.com), + # [empty]() + reg_url_and_title = %r{ + \( # opening + \s* # whitespace + #{reg_url}? # url = 1 might be empty + (?:\s+["'](.*)["'])? # optional title = 2 + \s* # whitespace + \) # closing + }x + + # Detect a link like ![Alt text][id] + span.map_match(/\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match| + text = match[1] + id = match[2].strip.downcase + + if id.size == 0 + id = text.strip.downcase + end + + children = parse_lines_as_span(text) + e = create_md_element(:link, children) + e.meta[:ref_id] = id + e + } + + # Detect any immage with immediate url: ![Alt](url "title") + # a dummy ref is created and put in the symbol table + link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/ + span.map_match(link1) { |match| + text = match[1] + children = parse_lines_as_span(text) + + url = match[2] + title = match[3] + + url = url.strip + # create a dummy id + id="dummy_#{@refs.size}" + @refs[id] = {:url=>url, :title=>title} + @refs[id][:title] = title if title + + e = create_md_element(:link, children) + e.meta[:ref_id] = id + e + } + + + # Detect any link like [Google engine][google] + span.match_couple_of('[', # opening bracket + %r{\] # closing bracket + [ ]? # optional whitespace + #{reg_id_ref} # ref id, with $1 being the reference + }x + ) { |children, match1, match2| + id = match2[1] + id = id.strip.downcase + + if id.size == 0 + id = children.join.strip.downcase + end + + e = create_md_element(:link, children) + e.meta[:ref_id] = id + e + } + + # Detect any link with immediate url: [Google](http://www.google.com) + # XXX Note that the url can be empty: [Empty]() + # a dummy ref is created and put in the symbol table + span.match_couple_of('[', # opening bracket + %r{\] # closing bracket + [ ]? # optional whitespace + #{reg_url_and_title} # ref id, with $1 being the url and $2 being the title + }x + ) { |children, match1, match2| + + url = match2[1] + title = match2[3] # XXX? Is it a bug? I would use [2] + + # create a dummy id + id="dummy_#{@refs.size}" + @refs[id] = {:url=>url} + @refs[id][:title] = title if title + + e = create_md_element(:link, children) + e.meta[:ref_id] = id + e + } + + # Detect an email address <andrea@invalid.it> + span.map_match(EMailAddress) { |match| + email = match[1] + e = create_md_element(:email_address, []) + e.meta[:email] = email + e + } + + # Detect HTML entitis + span.map_match(/&([\w\d]+);/) { |match| + entity_name = match[1] + + e = create_md_element(:entity, []) + e.meta[:entity_name] = entity_name + e + } + + + # And now the easy stuff + + # search for ***strong and em*** + span.match_couple_of('***') { |children,m1,m2| + create_md_element(:strong, [create_md_element(:emphasis, children)] ) } + + span.match_couple_of('___') { |children,m1,m2| + create_md_element(:strong, [create_md_element(:emphasis, children)] ) } + + # search for **strong** + span.match_couple_of('**') { |children,m1,m2| create_md_element(:strong, children) } + + # search for __strong__ + span.match_couple_of('__') { |children,m1,m2| create_md_element(:strong, children) } + + # search for *emphasis* + span.match_couple_of('*') { |children,m1,m2| create_md_element(:emphasis, children) } + + # search for _emphasis_ + span.match_couple_of('_') { |children,m1,m2| create_md_element(:emphasis, children) } + + # finally, unescape the special characters + span.replace_each_string { |s| s.unescape_md_special} + + span.children + end + + # returns array containing Strings or :linebreak elements + def resolve_linebreaks(lines) + res = [] + s = "" + lines.each do |l| + s += (s.size>0 ? " " : "") + l.strip + if force_linebreak?(l) + res << s + res << create_md_element(:linebreak) + s = "" + end + end + res << s if s.size > 0 + res + end + + # raw_html is something like + # <em> A</em> dopwkk *maruk* <em>A</em> + def convert_raw_html_in_list(raw_html) + e = create_md_element(:raw_html) + e.meta[:raw_html] = raw_html + begin + e.meta[:parsed_html] = Document.new(raw_html) + rescue + $stderr.puts "convert_raw_html_in_list Malformed HTML:\n#{raw_html}" + end + e + end + +end + +# And now the black magic that makes the part above so elegant +class MDElement + + # Try to match the regexp to each string in the hierarchy + # (using `replace_each_string`). If the regexp match, eliminate + # the matching string and substitute it with the pre_match, the + # result of the block, and the post_match + # + # ..., matched_string, ... -> ..., pre_match, block.call(match), post_match + # + # the block might return arrays. + # + def map_match(regexp, &block) + replace_each_string { |s| + processed = [] + while (match = regexp.match(s)) + # save the pre_match + processed << match.pre_match if match.pre_match && match.pre_match.size>0 + # transform match + result = block.call(match) + # and append as processed + [*result].each do |e| processed << e end + # go on with the rest of the string + s = match.post_match + end + processed << s if s.size > 0 + processed + } + end + + # Finds couple of delimiters in a hierarchy of Strings and MDElements + # + # Open and close are two delimiters (like '[' and ']'), or two Regexp. + # + # If you don't pass close, it defaults to open. + # + # Each block is called with |contained children, match1, match2| + def match_couple_of(open, close=nil, &block) + close = close || open + open_regexp = open.kind_of?(Regexp) ? open : Regexp.new(Regexp.escape(open)) + close_regexp = close.kind_of?(Regexp) ? close : Regexp.new(Regexp.escape(close)) + + # Do the same to children first + for c in @children; if c.kind_of? MDElement + c.match_couple_of(open_regexp, close_regexp, &block) + end end + + processed_children = [] + + until @children.empty? + c = @children.shift + if c.kind_of? String + match1 = open_regexp.match(c) + if not match1 + processed_children << c + else # we found opening, now search closing +# puts "Found opening (#{marker}) in #{c.inspect}" + # pre match is processed + processed_children.push match1.pre_match if + match1.pre_match && match1.pre_match.size > 0 + # we will process again the post_match + @children.unshift match1.post_match if + match1.post_match && match1.post_match.size>0 + + contained = []; found_closing = false + until @children.empty? || found_closing + c = @children.shift + if c.kind_of? String + match2 = close_regexp.match(c) + if not match2 + contained << c + else + # we found closing + found_closing = true + # pre match is contained + contained.push match2.pre_match if + match2.pre_match && match2.pre_match.size>0 + # we will process again the post_match + @children.unshift match2.post_match if + match2.post_match && match2.post_match.size>0 + + # And now we call the block + substitute = block.call(contained, match1, match2) + processed_children << substitute + +# puts "Found closing (#{marker}) in #{c.inspect}" +# puts "Children: #{contained.inspect}" +# puts "Substitute: #{substitute.inspect}" + end + else + contained << c + end + end + + if not found_closing + # $stderr.puts "##### Could not find closing for #{open}, #{close} -- ignoring" + processed_children << match1.to_s + contained.reverse.each do |c| + @children.unshift c + end + end + end + else + processed_children << c + end + end + + raise "BugBug" unless @children.empty? + + rebuilt = [] + # rebuild strings + processed_children.each do |c| + if c.kind_of?(String) && rebuilt.last && rebuilt.last.kind_of?(String) + rebuilt.last << c + else + rebuilt << c + end + end + @children = rebuilt + end +end diff --git a/lib/maruku/attributes.rb b/lib/maruku/attributes.rb new file mode 100644 index 00000000..07736186 --- /dev/null +++ b/lib/maruku/attributes.rb @@ -0,0 +1,218 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +class String + def quote_if_needed + if /[\s\'\"]/.match self + inspect + else + self + end + end +end + +module MaRuKu; + MagicChar = ':' + + class AttributeList < Array + + # An attribute list becomes + # {#id .cl key="val" ref} + # [ [:id, 'id'], [:class, 'id'], ['key', 'val'], [ :ref, 'ref' ]] + + private :push + + def push_key_val(key, val); + raise "Bad #{key.inspect}=#{val.inspect}" if not key and val + push [key, val] + end + def push_ref(ref_id); + raise "Bad :ref #{ref_id.inspect}" if not ref_id + push [:ref, ref_id] + end + def push_class(val); + raise "Bad :id #{val.inspect}" if not val + push [:class, val] + end + def push_id(val); + raise "Bad :id #{val.inspect}" if not val + push [:id, val] + end + + def to_s + map do |k,v| + case k + when :id; "#" + v.quote_if_needed + when :class; "." + v.quote_if_needed + when :ref; v.quote_if_needed + else k.quote_if_needed + "=" + v.quote_if_needed + end + end . join(' ') + end + alias to_md to_s + end + +end + +module MaRuKu; module In; module Markdown; module SpanLevelParser + + def unit_tests_for_attribute_lists + [ + [ "", [], "Empty lists are allowed" ], + [ "=", :throw, "Bad char to begin a list with." ], + [ "a =b", :throw, "No whitespace before `=`." ], + [ "a= b", :throw, "No whitespace after `=`." ], + + [ "a b c", [[:ref, 'a'],[:ref, 'b'],[:ref, 'c']], "More than one ref" ], + [ "hello notfound", [[:ref, 'hello'],[:ref, 'notfound']]], + + [ "'a'", [[:ref, 'a']], "Quoted value." ], + [ '"a"' ], + + [ "a=b", [['a','b']], "Simple key/val" ], + [ "'a'=b" ], + [ "'a'='b'" ], + [ "a='b'" ], + + [ 'a="b\'"', [['a',"b\'"]], "Key/val with quotes" ], + [ 'a=b\''], + [ 'a="\\\'b\'"', [['a',"\'b\'"]], "Key/val with quotes" ], + + ['"', :throw, "Unclosed quotes"], + ["'"], + ["'a "], + ['"a '], + + [ "#a", [[:id, 'a']], "Simple ID" ], + [ "#'a'" ], + [ '#"a"' ], + + [ "#", :throw, "Unfinished '#'." ], + [ ".", :throw, "Unfinished '.'." ], + [ "# a", :throw, "No white-space after '#'." ], + [ ". a", :throw, "No white-space after '.' ." ], + + [ "a=b c=d", [['a','b'],['c','d']], "Tabbing" ], + [ " \ta=b \tc='d' "], + [ "\t a=b\t c='d'\t\t"], + + [ ".\"a'", :throw, "Mixing quotes is bad." ], + + ].map { |s, expected, comment| + @expected = (expected ||= @expected) + @comment = (comment ||= (last=@comment) ) + (comment == last && (comment += (@count+=1).to_s)) || @count = 1 + expected = [md_ial(expected)] if expected.kind_of? Array + ["{#{MagicChar}#{s}}", expected, "Attributes: #{comment}"] + } + end + + def md_al(s=[]); AttributeList.new(s) end + + # returns nil or an AttributeList + def read_attribute_list(src, con, break_on_chars) + separators = break_on_chars + [?=,?\ ,?\t] + escaped = Maruku::EscapedCharInQuotes + + al = AttributeList.new + while true + src.consume_whitespace + break if break_on_chars.include? src.cur_char + + case src.cur_char + when nil + maruku_error "Attribute list terminated by EOF:\n "+ + "#{al.inspect}" , src, con + tell_user "I try to continue and return partial attribute list:\n"+ + al.inspect + break + when ?= # error + maruku_error "In attribute lists, cannot start identifier with `=`." + tell_user "I try to continue" + src.ignore_char + when ?# # id definition + src.ignore_char + if id = read_quoted_or_unquoted(src, con, escaped, separators) + al.push_id id + else + maruku_error 'Could not read `id` attribute.', src, con + tell_user 'Trying to ignore bad `id` attribute.' + end + when ?. # class definition + src.ignore_char + if klass = read_quoted_or_unquoted(src, con, escaped, separators) + al.push_class klass + else + maruku_error 'Could not read `class` attribute.', src, con + tell_user 'Trying to ignore bad `class` attribute.' + end + else + if key = read_quoted_or_unquoted(src, con, escaped, separators) + if src.cur_char == ?= + src.ignore_char # skip the = + if val = read_quoted_or_unquoted(src, con, escaped, separators) + al.push_key_val(key, val) + else + maruku_error "Could not read value for key #{key.inspect}.", + src, con + tell_user "Ignoring key #{key.inspect}." + end + else + al.push_ref key + end + else + maruku_error 'Could not read key or reference.' + end + end # case + end # while true + al + end + + + def merge_ial(elements, src, con) + # We need a helper + def is_ial(e); e.kind_of? MDElement and e.node_type == :ial end + + # Apply each IAL to the element before + elements.each_with_index do |e, i| + if is_ial(e) && i>= 1 then + before = elements[i-1] + after = elements[i+1] + if before.kind_of? MDElement + before.al = e.ial + elsif after.kind_of? MDElement + after.al = e.ial + else + maruku_error "I don't know who you are referring to:"+ + " {#{e.ial.to_md}}", src, con + # xxx dire se c'รจ empty vicino + maruku_recover "Ignoring IAL: {#{e.ial.to_md}}", src, con + end + end + end + + if not Globals[:debug_keep_ials] + elements.delete_if {|x| is_ial(x) unless x == elements.first} + end + end + +end end end end +#module MaRuKu; module In; module Markdown; module SpanLevelParser diff --git a/lib/maruku/defaults.rb b/lib/maruku/defaults.rb new file mode 100644 index 00000000..2d4af38e --- /dev/null +++ b/lib/maruku/defaults.rb @@ -0,0 +1,52 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +module MaRuKu + +Globals = { + :unsafe_features => false, + + :debug_keep_ials => false, + + :maruku_signature => false, + :code_background_color => '#fef', + :code_show_spaces => false, + :html_math_engine => 'itex2mml', #ritex, itex2mml, none + :html_use_syntax => false, + :on_error => :warning +} + +class MDElement + def get_setting(sym) + if self.attributes.has_key?(sym) then + return self.attributes[sym] + elsif self.doc && self.doc.attributes.has_key?(sym) then + return self.doc.attributes[sym] + elsif MaRuKu::Globals.has_key?(sym) + return MaRuKu::Globals[sym] + else + $stderr.puts "Bug: no default for #{sym.inspect}" + nil + end + end +end + +end diff --git a/lib/maruku/errors_management.rb b/lib/maruku/errors_management.rb new file mode 100644 index 00000000..387acea8 --- /dev/null +++ b/lib/maruku/errors_management.rb @@ -0,0 +1,92 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + + +#m Any method that detects formatting error calls the +#m maruku_error() method. +#m if @meta[:on_error] == +#m +#m - :warning write on the standard err (or @error_stream if defined), +#m then do your best. +#m - :ignore be shy and try to continue +#m - :raise raises a MarukuException +#m +#m default is :raise + +module MaRuKu + + class Exception < RuntimeError + end + +module Errors + + def maruku_error(s,src=nil,con=nil) + policy = get_setting(:on_error) + + case policy + when :ignore + when :raise + raise_error create_frame(describe_error(s,src,con)) + when :warning + tell_user create_frame(describe_error(s,src,con)) + else + raise "BugBug: policy = #{policy.inspect}" + end + end + + def maruku_recover(s,src=nil,con=nil) + tell_user create_frame(describe_error(s,src,con)) + end + + alias error maruku_error + + def raise_error(s) + raise MaRuKu::Exception, s, caller + end + + def tell_user(s) + error_stream = self.attributes[:error_stream] || $stderr + error_stream << s + end + + def create_frame(s) + n = 75 + "\n" + + " "+"_"*n << "\n"<< + "| Maruku tells you:\n" << + "+"+"-"*n +"\n"+ + add_tabs(s,1,'| ') << "\n" << + "+" << "-"*n << "\n" << + add_tabs(caller[0, 5].join("\n"),1,'!') << "\n" << + "\\" << "_"*n << "\n" + end + + def describe_error(s,src,con) + t = s + src && (t += "\n#{src.describe}\n") + con && (t += "\n#{con.describe}\n") + t + end + +end # Errors +end # MaRuKu + + diff --git a/lib/maruku/ext/math.rb b/lib/maruku/ext/math.rb new file mode 100644 index 00000000..55b51ceb --- /dev/null +++ b/lib/maruku/ext/math.rb @@ -0,0 +1,10 @@ + + +require 'maruku/ext/math/elements' +require 'maruku/ext/math/parsing' +require 'maruku/ext/math/to_latex' +require 'maruku/ext/math/to_html' + +require 'maruku/ext/math/mathml_engines/none' +require 'maruku/ext/math/mathml_engines/ritex' +require 'maruku/ext/math/mathml_engines/itex2mml' diff --git a/lib/maruku/ext/math/elements.rb b/lib/maruku/ext/math/elements.rb new file mode 100644 index 00000000..5f9c4dfa --- /dev/null +++ b/lib/maruku/ext/math/elements.rb @@ -0,0 +1,26 @@ +module MaRuKu; class MDElement + + def md_inline_math(math) + self.md_el(:inline_math, [], meta={:math=>math}) + end + + def md_equation(math, label=nil) + reglabel= /\\label\{(\w+)\}/ + if math =~ reglabel + label = $1 + math.gsub!(reglabel,'') + end +# puts "Found label = #{label} math #{math.inspect} " + num = nil + if label && @doc #take number + @doc.eqid2eq ||= {} + num = @doc.eqid2eq.size + 1 + end + e = self.md_el(:equation, [], meta={:math=>math, :label=>label,:num=>num}) + if label && @doc #take number + @doc.eqid2eq[label] = e + end + e + end + +end end \ No newline at end of file diff --git a/lib/maruku/ext/math/mathml_engines/itex2mml.rb b/lib/maruku/ext/math/mathml_engines/itex2mml.rb new file mode 100644 index 00000000..4f5be42e --- /dev/null +++ b/lib/maruku/ext/math/mathml_engines/itex2mml.rb @@ -0,0 +1,35 @@ + +module MaRuKu; module Out; module HTML + + def convert_to_mathml_itex2mml(tex, method) + begin + if not $itex2mml_parser + require 'itextomml' + $itex2mml_parser = Itex2MML::Parser.new + end + + mathml = $itex2mml_parser.send(method, tex) + doc = Document.new(mathml, {:respect_whitespace =>:all}).root + return doc + rescue LoadError => e + maruku_error "Could not load package 'itex2mml'.\n"+ + "Please install it." + rescue REXML::ParseException => e + maruku_error "Invalid MathML TeX: \n#{add_tabs(tex,1,'tex>')}"+ + "\n\n #{e.inspect}" + rescue + maruku_error "Could not produce MathML TeX: \n#{tex}"+ + "\n\n #{e.inspect}" + end + nil + end + + def to_html_inline_math_itex2mml + convert_to_mathml_itex2mml(self.math, :inline_filter) + end + + def to_html_equation_itex2mml + convert_to_mathml_itex2mml(self.math, :block_filter) + end + +end end end diff --git a/lib/maruku/ext/math/mathml_engines/none.rb b/lib/maruku/ext/math/mathml_engines/none.rb new file mode 100644 index 00000000..5fd04ec4 --- /dev/null +++ b/lib/maruku/ext/math/mathml_engines/none.rb @@ -0,0 +1,20 @@ +module MaRuKu; module Out; module HTML + + def to_html_inline_math_none + # You can: either return a REXML::Element + # return Element.new 'div' + # or return an empty array on error + # return [] + # or have a string parsed by REXML: + tex = self.math + tex.gsub!('&','&') + mathml = "<code>#{tex}</code>" + return Document.new(mathml).root + end + + def to_html_equation_none + return to_html_inline_math_none + end + +end end end + diff --git a/lib/maruku/ext/math/mathml_engines/ritex.rb b/lib/maruku/ext/math/mathml_engines/ritex.rb new file mode 100644 index 00000000..fbb753b0 --- /dev/null +++ b/lib/maruku/ext/math/mathml_engines/ritex.rb @@ -0,0 +1,34 @@ +module MaRuKu; module Out; module HTML + def convert_to_mathml_ritex(tex) + begin + if not $ritex_parser + require 'ritex' + $ritex_parser = Ritex::Parser.new + end + + mathml = $ritex_parser.parse(tex.strip) + doc = Document.new(mathml, {:respect_whitespace =>:all}).root + return doc + rescue LoadError => e + maruku_error "Could not load package 'ritex'.\n"+ + "Please install it using:\n"+ + " $ gem install ritex\n\n"+e.inspect + rescue Racc::ParseError => e + maruku_error "Could not parse TeX: \n#{tex}"+ + "\n\n #{e.inspect}" + end + nil + end + + def to_html_inline_math_ritex + tex = self.math + mathml = convert_to_mathml_ritex(tex) + return mathml || [] + end + + def to_html_equation_ritex + tex = self.math + mathml = convert_to_mathml_ritex(tex) + return mathml || [] + end +end end end diff --git a/lib/maruku/ext/math/parsing.rb b/lib/maruku/ext/math/parsing.rb new file mode 100644 index 00000000..ff317afb --- /dev/null +++ b/lib/maruku/ext/math/parsing.rb @@ -0,0 +1,82 @@ +module MaRuKu + class MDDocument + # Hash equation id (String) to equation element (MDElement) + attr_accessor :eqid2eq + end +end + + + # At least one slash inside + #RegInlineMath1 = /\$([^\$]*[\\][^\$]*)\$/ + # No spaces around the delimiters + #RegInlineMath2 = /\$([^\s\$](?:[^\$]*[^\s\$])?)\$/ + #RegInlineMath = Regexp::union(RegInlineMath1,RegInlineMath2) + + # Everything goes; takes care of escaping the "\$" inside the expression + RegInlineMath = /\${1}((?:[^\$]|\\\$)+)\$/ + + MaRuKu::In::Markdown:: + register_span_extension(:chars => ?$, :regexp => RegInlineMath) do + |doc, src, con| + if m = src.read_regexp(RegInlineMath) + math = m.captures.compact.first + con.push doc.md_inline_math(math) + true + else + #puts "not math: #{src.cur_chars 10}" + false + end + end + + EquationStart = /^[ ]{0,3}(?:\\\[|\$\$)(.*)$/ + + EqLabel = /(?:\((\w+)\))/ + OneLineEquation = /^[ ]{0,3}(?:\\\[|\$\$)(.*)(?:\\\]|\$\$)\s*#{EqLabel}?\s*$/ + EquationEnd = /^(.*)(?:\\\]|\$\$)\s*#{EqLabel}?\s*$/ + + MaRuKu::In::Markdown:: + register_block_extension(:regexp => EquationStart) do |doc, src, con| +# puts "Equation :#{self}" + first = src.shift_line + if first =~ OneLineEquation + math = $1 + label = $2 + con.push doc.md_equation($1, $2) + else + first =~ EquationStart + math = $1 + label = nil + while true + if not src.cur_line + maruku_error "Stream finished while reading equation\n\n"+ + add_tabs(math,1,'$> '), src, con + break + end + line = src.shift_line + if line =~ EquationEnd + math += $1 + "\n" + label = $2 if $2 + break + else + math += line + "\n" + end + end + con.push doc.md_equation(math, label) + end + true + end + + + # This adds support for \eqref + RegEqrefLatex = /\\eqref\{(\w+)\}/ + RegEqPar = /\(eq:(\w+)\)/ + RegEqref = Regexp::union(RegEqrefLatex, RegEqPar) + + MaRuKu::In::Markdown:: + register_span_extension(:chars => [?\\, ?(], :regexp => RegEqref) do + |doc, src, con| + eqid = src.read_regexp(RegEqref).captures.compact.first + r = doc.md_el(:eqref, [], meta={:eqid=>eqid}) + con.push r + true + end diff --git a/lib/maruku/ext/math/to_html.rb b/lib/maruku/ext/math/to_html.rb new file mode 100644 index 00000000..5bc3fcab --- /dev/null +++ b/lib/maruku/ext/math/to_html.rb @@ -0,0 +1,107 @@ + +=begin maruku_doc +Attribute: html_math_engine +Scope: document, element +Output: html +Summary: Select the rendering engine for math. +Default: <?mrk Globals[:html_math_engine].to_s ?> + +Select the rendering engine for math. + +If you want to use your engine `foo`, then set: + + HTML math engine: foo +{:lang=markdown} + +and then implement two functions: + + def to_html_inline_math_foo + # You can: either return a REXML::Element + # return Element.new 'div' + # or return an empty array on error + # return [] + # or have a string parsed by REXML: + tex = self.math + tex.gsub!('&','&') + mathml = "<code>#{tex}</code>" + return Document.new(mathml).root + end + + def to_html_equation_foo + # same thing + ... + end +{:lang=ruby} + +=end + +module MaRuKu; module Out; module HTML + + def to_html_inline_math + s = get_setting(:html_math_engine) + method = "to_html_inline_math_#{s}".to_sym + if self.respond_to? method + self.send method || to_html_equation_none + else + puts "A method called #{method} should be defined." + return [] + end + end + + def add_class_to(el, cl) + el.attributes['class'] = + if already = el.attributes['class'] + already + " " + cl + else + cl + end + end + + def to_html_equation + s = get_setting(:html_math_engine) + method = "to_html_equation_#{s}".to_sym + if self.respond_to? method + mathml = self.send(method) || to_html_equation_none + div = create_html_element 'div' + add_class_to(div, 'maruku-equation') + if self.label # then numerate + span = Element.new 'span' + span.attributes['class'] = 'maruku-eq-number' + num = self.num + span << Text.new("(#{num})") + div << span + div.attributes['id'] = "eq:#{self.label}" + end + div << mathml + + source_div = Element.new 'div' + add_class_to(source_div, 'maruku-eq-tex') + code = to_html_equation_none + code.attributes['style'] = 'display: none' + source_div << code + div << source_div + div + else + puts "A method called #{method} should be defined." + return [] + end + end + + def to_html_eqref + if eq = self.doc.eqid2eq[self.eqid] + num = eq.num + a = Element.new 'a' + a.attributes['class'] = 'maruku-eqref' + a.attributes['href'] = "#eq:#{self.eqid}" + a << Text.new("(#{num})") + a + else + maruku_error "Cannot find equation #{self.eqid.inspect}" + Text.new "(#{self.eqid})" + end + end + + +end end end + + diff --git a/lib/maruku/ext/math/to_latex.rb b/lib/maruku/ext/math/to_latex.rb new file mode 100644 index 00000000..578eb713 --- /dev/null +++ b/lib/maruku/ext/math/to_latex.rb @@ -0,0 +1,21 @@ + +module MaRuKu; module Out; module Latex + + def to_latex_inline_math + "$#{self.math.strip}$" + end + + def to_latex_equation + if self.label + l = "\\label{#{self.label}}" + "\\begin{equation}\n#{self.math.strip}\n#{l}\\end{equation}\n" + else + "\\begin{displaymath}\n#{self.math.strip}\n\\end{displaymath}\n" + end + end + + def to_latex_eqref + "\\eqref{#{self.eqid}}" + end + +end end end \ No newline at end of file diff --git a/lib/maruku/helpers.rb b/lib/maruku/helpers.rb new file mode 100644 index 00000000..63defabb --- /dev/null +++ b/lib/maruku/helpers.rb @@ -0,0 +1,259 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + + + +# A series of helper functions for creating elements: they hide the +# particular internal representation. +# +# Please, always use these instead of creating MDElement. +# + +module MaRuKu +module Helpers + + # if the first is a md_ial, it is used as such + def md_el(node_type, children=[], meta={}, al=nil) + if (e=children.first).kind_of?(MDElement) and + e.node_type == :ial then + if al + al += e.ial + else + al = e.ial + end + children.shift + end + e = MDElement.new(node_type, children, meta, al) + e.doc = @doc + return e + end + + def md_header(level, children, al=nil) + md_el(:header, children, {:level => level}, al) + end + + # Inline code + def md_code(code, al=nil) + md_el(:inline_code, [], {:raw_code => code}, al) + end + + # Code block + def md_codeblock(source, al=nil) + md_el(:code, [], {:raw_code => source}, al) + end + + def md_quote(children, al=nil) + md_el(:quote, children, {}, al) + end + + def md_li(children, want_my_par, al=nil) + md_el(:li, children, {:want_my_paragraph=>want_my_par}, al) + end + + def md_footnote(footnote_id, children, al=nil) + md_el(:footnote, children, {:footnote_id=>footnote_id}, al) + end + + def md_abbr_def(abbr, text, al=nil) + md_el(:abbr_def, [], {:abbr=>abbr, :text=>text}, al) + end + + def md_abbr(abbr, title) + md_el(:abbr, [abbr], {:title=>title}) + end + + def md_html(raw_html, al=nil) + e = md_el(:raw_html, [], {:raw_html=>raw_html}) + begin + # remove newlines and whitespace at begin + # end end of string, or else REXML gets confused + raw_html = raw_html.gsub(/\A\s*</,'<'). + gsub(/>[\s\n]*\Z/,'>') + + raw_html = "<marukuwrap>#{raw_html}</marukuwrap>" + e.instance_variable_set :@parsed_html, + REXML::Document.new(raw_html) + rescue +# tell_user "Malformed block of HTML:\n"+ +# add_tabs(raw_html,1,'|') +# " #{raw_html.inspect}\n\n"+ex.inspect + end + e + end + + def md_link(children, ref_id, al=nil) + md_el(:link, children, {:ref_id=>ref_id.downcase}, al) + end + + def md_im_link(children, url, title=nil, al=nil) + md_el(:im_link, children, {:url=>url,:title=>title}, al) + end + + def md_image(children, ref_id, al=nil) + md_el(:image, children, {:ref_id=>ref_id}, al) + end + + def md_im_image(children, url, title=nil, al=nil) + md_el(:im_image, children, {:url=>url,:title=>title},al) + end + + def md_em(children, al=nil) + md_el(:emphasis, [children].flatten, {}, al) + end + + def md_br() + md_el(:linebreak, [], {}, nil) + end + + def md_hrule() + md_el(:hrule, [], {}, nil) + end + + def md_strong(children, al=nil) + md_el(:strong, [children].flatten, {}, al) + end + + def md_emstrong(children, al=nil) + md_strong(md_em(children), al) + end + + # <http://www.example.com/> + def md_url(url, al=nil) + md_el(:immediate_link, [], {:url=>url}, al) + end + + # <andrea@rubyforge.org> + # <mailto:andrea@rubyforge.org> + def md_email(email, al=nil) + md_el(:email_address, [], {:email=>email}, al) + end + + def md_entity(entity_name, al=nil) + md_el(:entity, [], {:entity_name=>entity_name}, al) + end + + # Markdown extra + def md_foot_ref(ref_id, al=nil) + md_el(:footnote_reference, [], {:footnote_id=>ref_id}, al) + end + + def md_par(children, al=nil) + md_el(:paragraph, children, meta={}, al) + end + + # [1]: http://url [properties] + def md_ref_def(ref_id, url, title=nil, meta={}, al=nil) + meta[:url] = url + meta[:ref_id] = ref_id + meta[:title] = title if title + md_el(:ref_definition, [], meta, al) + end + + # inline attribute list + def md_ial(al) + al = Maruku::AttributeList.new(al) if + not al.kind_of?Maruku::AttributeList + md_el(:ial, [], {:ial=>al}) + end + + # Attribute list definition + def md_ald(id, al) + md_el(:ald, [], {:ald_id=>id,:ald=>al}) + end + + # Server directive <?target code... ?> + def md_xml_instr(target, code) + md_el(:xml_instr, [], {:target=>target, :code=>code}) + end + +end +end + +module MaRuKu + +class MDElement + # outputs abbreviated form (this should be eval()uable to get the document) + def inspect2 + s = + case @node_type + when :paragraph + "md_par(%s)" % children_inspect + when :footnote_reference + "md_foot_ref(%s)" % self.footnote_id.inspect + when :entity + "md_entity(%s)" % self.entity_name.inspect + when :email_address + "md_email(%s)" % self.email.inspect + when :inline_code + "md_code(%s)" % self.raw_code.inspect + when :raw_html + "md_html(%s)" % self.raw_html.inspect + when :emphasis + "md_em(%s)" % children_inspect + when :strong + "md_strong(%s)" % children_inspect + when :immediate_link + "md_url(%s)" % self.url.inspect + when :image + "md_image(%s, %s)" % [ + children_inspect, + self.ref_id.inspect] + when :im_image + "md_im_image(%s, %s, %s)" % [ + children_inspect, + self.url.inspect, + self.title.inspect] + when :link + "md_link(%s,%s)" % [ + children_inspect, self.ref_id.inspect] + when :im_link + "md_im_link(%s, %s, %s)" % [ + children_inspect, + self.url.inspect, + self.title.inspect, + ] + when :ref_definition + "md_ref_def(%s, %s, %s)" % [ + self.ref_id.inspect, + self.url.inspect, + self.title.inspect + ] + when :ial + "md_ial(%s)" % self.ial.inspect + else + return nil + end + if @al and not @al.empty? then + s = s.chop + ", #{@al.inspect})" + end + s + end + +end + +end + + + + + + + diff --git a/lib/maruku/input/charsource.rb b/lib/maruku/input/charsource.rb new file mode 100644 index 00000000..6ce554fb --- /dev/null +++ b/lib/maruku/input/charsource.rb @@ -0,0 +1,325 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +module MaRuKu; module In; module Markdown; module SpanLevelParser + +# a string scanner coded by me +class CharSourceManual; end + +# a wrapper around StringScanner +class CharSourceStrscan; end + +# A debug scanner that checks the correctness of both +# by comparing their output +class CharSourceDebug; end + +# Choose! + +CharSource = CharSourceManual # faster! 58ms vs. 65ms +#CharSource = CharSourceStrscan +#CharSource = CharSourceDebug + + +class CharSourceManual + include MaRuKu::Strings + + def initialize(s, parent=nil) + raise "Passed #{s.class}" if not s.kind_of? String + @buffer = s + @buffer_index = 0 + @parent = parent + end + + # Return current char as a FixNum (or nil). + def cur_char; @buffer[@buffer_index] end + + # Return the next n chars as a String. + def cur_chars(n); @buffer[@buffer_index,n] end + + # Return the char after current char as a FixNum (or nil). + def next_char; @buffer[@buffer_index+1] end + + def shift_char + c = @buffer[@buffer_index] + @buffer_index+=1 + c + end + + def ignore_char + @buffer_index+=1 + nil + end + + def ignore_chars(n) + @buffer_index+=n + nil + end + + def current_remaining_buffer + @buffer[@buffer_index, @buffer.size-@buffer_index] + end + + def cur_chars_are(string) + # There is a bug here + if false + r2 = /^.{#{@buffer_index}}#{Regexp.escape string}/m + @buffer =~ r2 + else + cur_chars(string.size) == string + end + end + + def next_matches(r) + r2 = /^.{#{@buffer_index}}#{r}/m + md = r2.match @buffer + return !!md + end + + def read_regexp3(r) + r2 = /^.{#{@buffer_index}}#{r}/m + m = r2.match @buffer + if m + consumed = m.to_s.size - @buffer_index +# puts "Consumed #{consumed} chars (entire is #{m.to_s.inspect})" + ignore_chars consumed + else +# puts "Could not read regexp #{r2.inspect} from buffer "+ +# " index=#{@buffer_index}" +# puts "Cur chars = #{cur_chars(20).inspect}" +# puts "Matches? = #{cur_chars(20) =~ r}" + end + m + end + + def read_regexp(r) + r2 = /^#{r}/ + rest = current_remaining_buffer + m = r2.match(rest) + if m + @buffer_index += m.to_s.size +# puts "#{r} matched #{rest.inspect}: #{m.to_s.inspect}" + end + return m + end + + def consume_whitespace + while c = cur_char + if (c == 32 || c == ?\t) +# puts "ignoring #{c}" + ignore_char + else +# puts "#{c} is not ws: "<<c + break + end + end + end + + def read_text_chars(out) + s = @buffer.size; c=nil + while @buffer_index < s && (c=@buffer[@buffer_index]) && + ((c>=?a && c<=?z) || (c>=?A && c<=?Z)) + out << c + @buffer_index += 1 + end + end + + def describe + s = describe_pos(@buffer, @buffer_index) + if @parent + s += "\n\n" + @parent.describe + end + s + end + include SpanLevelParser +end + +def describe_pos(buffer, buffer_index) + len = 75 + num_before = [len/2, buffer_index].min + num_after = [len/2, buffer.size-buffer_index].min + num_before_max = buffer_index + num_after_max = buffer.size-buffer_index + +# puts "num #{num_before} #{num_after}" + num_before = [num_before_max, len-num_after].min + num_after = [num_after_max, len-num_before].min +# puts "num #{num_before} #{num_after}" + + index_start = [buffer_index - num_before, 0].max + index_end = [buffer_index + num_after, buffer.size].min + + size = index_end- index_start + +# puts "- #{index_start} #{size}" + + str = buffer[index_start, size] + str.gsub!("\n",'N') + str.gsub!("\t",'T') + + if index_end == buffer.size + str += "EOF" + end + + pre_s = buffer_index-index_start + pre_s = [pre_s, 0].max + pre_s2 = [len-pre_s,0].max +# puts "pre_S = #{pre_s}" + pre =" "*(pre_s) + + "-"*len+"\n"+ + str + "\n" + + "-"*pre_s + "|" + "-"*(pre_s2)+"\n"+ +# pre + "|\n"+ + pre + "+--- Byte #{buffer_index}\n"+ + + "Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+ + add_tabs(buffer,1,">") + +# "CharSource: At character #{@buffer_index} of block "+ +# " beginning with:\n #{@buffer[0,50].inspect} ...\n"+ +# " before: \n ... #{cur_chars(50).inspect} ... " +end + + +require 'strscan' + +class CharSourceStrscan + include SpanLevelParser + include MaRuKu::Strings + + def initialize(s) + @s = StringScanner.new(s) + end + + # Return current char as a FixNum (or nil). + def cur_char + @s.peek(1)[0] + end + + # Return the next n chars as a String. + def cur_chars(n); + @s.peek(n) + end + + # Return the char after current char as a FixNum (or nil). + def next_char; + @s.peek(2)[1] + end + + def shift_char + (@s.get_byte)[0] + end + + def ignore_char + @s.get_byte + nil + end + + def ignore_chars(n) + n.times do @s.get_byte end + nil + end + + def current_remaining_buffer + @s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index] + end + + def cur_chars_are(string) + cur_chars(string.size) == string + end + + def next_matches(r) + len = @s.match?(r) + return !!len + end + + def read_regexp(r) + string = @s.scan(r) + if string + return r.match(string) + else + return nil + end + end + + def consume_whitespace + @s.scan /\s+/ + nil + end + + def describe + describe_pos(@s.string, @s.pos) + end + +end + + +class CharSourceDebug + def initialize(s) + @a = CharSourceManual.new(s) + @b = CharSourceStrscan.new(s) + end + + def method_missing(methodname, *args) + a_bef = @a.describe + b_bef = @b.describe + + a = @a.send(methodname, *args) + b = @b.send(methodname, *args) + +# if methodname == :describe +# return a +# end + + if a.kind_of? MatchData + if a.to_a != b.to_a + puts "called: #{methodname}(#{args})" + puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}" + puts "AFTER: "+@a.describe + puts "AFTER: "+@b.describe + puts "BEFORE: "+a_bef + puts "BEFORE: "+b_bef + puts caller.join("\n") + exit + end + else + if a!=b + puts "called: #{methodname}(#{args})" + puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}" + puts ""+@a.describe + puts ""+@b.describe + puts caller.join("\n") + exit + end + end + + if @a.cur_char != @b.cur_char + puts "Fuori sincronia dopo #{methodname}(#{args})" + puts ""+@a.describe + puts ""+@b.describe + exit + end + + return a + end +end + +end end end end diff --git a/lib/maruku/input/extensions.rb b/lib/maruku/input/extensions.rb new file mode 100644 index 00000000..b8110b00 --- /dev/null +++ b/lib/maruku/input/extensions.rb @@ -0,0 +1,68 @@ +module MaRuKu; module In; module Markdown + + + # Hash Fixnum -> name + SpanExtensionsTrigger = {} + + + class SpanExtension + # trigging chars + attr_accessor :chars + # trigging regexp + attr_accessor :regexp + # lambda + attr_accessor :block + end + + # Hash String -> Extension + SpanExtensions = {} + + def check_span_extensions(src, con) + c = src.cur_char + if extensions = SpanExtensionsTrigger[c] + extensions.each do |e| + if e.regexp && (match = src.next_matches(e.regexp)) + return true if e.block.call(doc, src, con) + end + end + end + return false # not special + end + + def self.register_span_extension(args, &block) + e = SpanExtension.new + e.chars = [*args[:chars]] + e.regexp = args[:regexp] + e.block = block + e.chars.each do |c| + (SpanExtensionsTrigger[c] ||= []).push e + end + end + + def self.register_block_extension(args, &block) + regexp = args[:regexp] + BlockExtensions[regexp] = block + end + + # Hash Regexp -> Block + BlockExtensions = {} + + def check_block_extensions(src, con, line) + BlockExtensions.each do |reg, block| + if m = reg.match(line) + block = BlockExtensions[reg] + return true if block.call(doc, src, con) + end + end + return false # not special + end + + def any_matching_block_extension?(line) + BlockExtensions.each_key do |reg| + m = reg.match(line) + return m if m + end + return false + end + +end end end diff --git a/lib/maruku/input/html_helper.rb b/lib/maruku/input/html_helper.rb new file mode 100644 index 00000000..4275d90f --- /dev/null +++ b/lib/maruku/input/html_helper.rb @@ -0,0 +1,144 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +module MaRuKu; module In; module Markdown; module SpanLevelParser + +# This class helps me read and sanitize HTML blocks + +# I tried to do this with REXML, but wasn't able to. (suggestions?) + + class HTMLHelper + include MaRuKu::Strings + + Tag = %r{^<(/)?(\w+)\s*([^>]*)>}m + EverythingElse = %r{^[^<]+}m + CommentStart = %r{^<!--}x + CommentEnd = %r{^.*-->} + TO_SANITIZE = ['img','hr'] + +# attr_accessor :inside_comment + attr_reader :rest + + def initialize + @rest = "" + @tag_stack = [] + @m = nil + @already = "" + @inside_comment = false + end + + def eat_this(line) + @rest = line + @rest + things_read = 0 + until @rest.empty? + if @inside_comment + if @m = CommentEnd.match(@rest) + @inside_comment = false + @already += @m.pre_match + @m.to_s + @rest = @m.post_match + elsif @m = EverythingElse.match(@rest) + @already += @m.pre_match + @m.to_s + @rest = @m.post_match + end + else + if @m = CommentStart.match(@rest) + things_read += 1 + @inside_comment = true + @already += @m.pre_match + @m.to_s + @rest = @m.post_match + elsif @m = Tag.match(@rest) + things_read += 1 + @already += @m.pre_match + @rest = @m.post_match + + is_closing = !!@m[1] + tag = @m[2] + attributes = @m[3] + + is_single = false + if attributes =~ /\A(.*)\/\Z/ + attributes = $1 + is_single = true + end + + if TO_SANITIZE.include? tag + attributes.strip! + # puts "Attributes: #{attributes.inspect}" + if attributes.size > 0 + @already += '<%s %s />' % [tag, attributes] + else + @already += '<%s />' % [tag] + end + elsif is_closing + @already += @m.to_s + if @tag_stack.empty? + error "Malformed: closing tag #{tag.inspect} "+ + "in empty list" + end + if @tag_stack.last != tag + error "Malformed: tag <#{tag}> "+ + "closes <#{@tag_stack.last}>" + end + @tag_stack.pop + elsif not is_single + @tag_stack.push tag + @already += @m.to_s + end + elsif @m = EverythingElse.match(@rest) + @already += @m.pre_match + @m.to_s + @rest = @m.post_match + else + error "Malformed HTML: not complete: #{@rest.inspect}" + end + end # not inside comment + +# puts inspect +# puts "Read: #{@tag_stack.inspect}" + break if is_finished? and things_read>0 + end + end + + + def error(s) + raise Exception, "Error: #{s} \n"+ inspect, caller + end + + def inspect; "HTML READER\n comment=#{@inside_comment} "+ + "match=#{@m.to_s.inspect}\n"+ + "Tag stack = #{@tag_stack.inspect} \n"+ + "Before:\n"+ + add_tabs(@already,1,'|')+"\n"+ + "After:\n"+ + add_tabs(@rest,1,'|')+"\n" + + end + + + def stuff_you_read + @already + end + + def is_finished? + not @inside_comment and @tag_stack.empty? + end + end # html helper + +end end end end diff --git a/lib/maruku/input/linesource.rb b/lib/maruku/input/linesource.rb new file mode 100644 index 00000000..5d0a67d8 --- /dev/null +++ b/lib/maruku/input/linesource.rb @@ -0,0 +1,111 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +module MaRuKu; module In; module Markdown; module BlockLevelParser + +# This represents a source of lines that can be consumed. +# +# It is the twin of CharSource. +# + +class LineSource + include MaRuKu::Strings + + def initialize(lines, parent=nil, parent_offset=nil) + raise "NIL lines? " if not lines + @lines = lines + @lines_index = 0 + @parent = parent + @parent_offset = parent_offset + end + + def cur_line() @lines[@lines_index] end + def next_line() @lines[@lines_index+1] end + + def shift_line() + raise "Over the rainbow" if @lines_index >= @lines.size + l = @lines[@lines_index] + @lines_index += 1 + return l + end + + def ignore_line + raise "Over the rainbow" if @lines_index >= @lines.size + @lines_index += 1 + end + + def describe + #s = "At line ##{@lines_index} of #{@lines.size}:\n" + s = "At line #{original_line_number(@lines_index)}\n" + + context = 3 # lines + from = [@lines_index-context, 0].max + to = [@lines_index+context, @lines.size-1].min + + for i in from..to + prefix = (i == @lines_index) ? '--> ' : ' '; + l = @lines[i] + s += "%10s %4s|#{l}" % + [@lines[i].md_type.to_s, prefix] + + s += "|\n" + end + +# if @parent +# s << "Parent context is: \n" +# s << add_tabs(@parent.describe,1,'|') +# end + s + end + + def original_line_number(index) + if @parent + return index + @parent.original_line_number(@parent_offset) + else + 1 + index + end + end + + def cur_index + @lines_index + end + + # Returns the type of next line as a string + # breaks at first :definition + def tell_me_the_future + s = ""; num_e = 0; + for i in @lines_index..@lines.size-1 + c = case @lines[i].md_type + when :text; "t" + when :empty; num_e+=1; "e" + when :definition; "d" + else "o" + end + s += c + break if c == "d" or num_e>1 + end + s + end + +end # linesource + +end end end end # block + diff --git a/lib/maruku/input/parse_block.rb b/lib/maruku/input/parse_block.rb new file mode 100644 index 00000000..3c1d1288 --- /dev/null +++ b/lib/maruku/input/parse_block.rb @@ -0,0 +1,594 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +module MaRuKu; module In; module Markdown; module BlockLevelParser + + include Helpers + include MaRuKu::Strings + include MaRuKu::In::Markdown::SpanLevelParser + + class BlockContext < Array + def describe + n = 5 + desc = size > n ? self[-n,n] : self + "Last #{n} elements: "+ + desc.map{|x| "\n -" + x.inspect}.join + end + end + + # Splits the string and calls parse_lines_as_markdown + def parse_text_as_markdown(text) + lines = split_lines(text) + src = LineSource.new(lines) + return parse_blocks(src) + end + + # Input is a LineSource + def parse_blocks(src) + output = BlockContext.new + + # run state machine + while src.cur_line + + next if check_block_extensions(src, output, src.cur_line) + +# Prints detected type (useful for debugging) +# puts "#{src.cur_line.md_type}|#{src.cur_line}" + case src.cur_line.md_type + when :empty; + output.push :empty + src.ignore_line + when :ial + m = InlineAttributeList.match src.shift_line + content = m[1] || "" + src2 = CharSource.new(content, src) + interpret_extension(src2, output, [nil]) + when :ald + output.push read_ald(src) + when :text + if src.cur_line =~ MightBeTableHeader and + (src.next_line && src.next_line =~ TableSeparator) + output.push read_table(src) + elsif [:header1,:header2].include? src.next_line.md_type + output.push read_header12(src) + elsif eventually_comes_a_def_list(src) + definition = read_definition(src) + if output.last.kind_of?(MDElement) && + output.last.node_type == :definition_list then + output.last.children << definition + else + output.push md_el(:definition_list, [definition]) + end + else # Start of a paragraph + output.push read_paragraph(src) + end + when :header2, :hrule + # hrule + src.shift_line + output.push md_hrule() + when :header3 + output.push read_header3(src) + when :ulist, :olist + list_type = src.cur_line.md_type == :ulist ? :ul : :ol + li = read_list_item(src) + # append to current list if we have one + if output.last.kind_of?(MDElement) && + output.last.node_type == list_type then + output.last.children << li + else + output.push md_el(list_type, [li]) + end + when :quote; output.push read_quote(src) + when :code; e = read_code(src); output << e if e + when :raw_html; e = read_raw_html(src); output << e if e + + when :footnote_text; output.push read_footnote_text(src) + when :ref_definition; output.push read_ref_definition(src) + when :abbreviation; output.push read_abbreviation(src) + when :xml_instr; read_xml_instruction(src, output) + when :metadata; + maruku_error "Please use the new meta-data syntax: \n"+ + " http://maruku.rubyforge.org/proposal.html\n", src + src.ignore_line + else # warn if we forgot something + md_type = src.cur_line.md_type + line = src.cur_line + maruku_error "Ignoring line '#{line}' type = #{md_type}", src + src.shift_line + end + end + + merge_ial(output, src, output) + output.delete_if {|x| x.kind_of?(MDElement) && + x.node_type == :ial} + + # get rid of empty line markers + output.delete_if {|x| x == :empty} + # See for each list if we can omit the paragraphs and use li_span + # TODO: do this after + output.each do |c| + # Remove paragraphs that we can get rid of + if [:ul,:ol].include? c.node_type + if c.children.all? {|li| !li.want_my_paragraph} then + c.children.each do |d| + d.node_type = :li_span + d.children = d.children[0].children + end + end + end + if c.node_type == :definition_list + if c.children.all?{|defi| !defi.want_my_paragraph} then + c.children.each do |definition| + definition.definitions.each do |dd| + dd.children = dd.children[0].children + end + end + end + end + end + + output + end + + + + def read_ald(src) + if (l=src.shift_line) =~ AttributeDefinitionList + id = $1; al=$2; + al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil]) + self.ald[id] = al; + return md_ald(id, al) + else + maruku_error "Bug Bug:\n#{l.inspect}" + return nil + end + end + + # reads a header (with ----- or ========) + def read_header12(src) + line = src.shift_line.strip + al = nil + # Check if there is an IAL + if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/ + line = $1.strip + ial = $2 + al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil]) + end + text = parse_lines_as_span [ line ] + level = src.cur_line.md_type == :header2 ? 2 : 1; + src.shift_line + return md_header(level, text, al) + end + + # reads a header like '#### header ####' + def read_header3(src) + line = src.shift_line.strip + al = nil + # Check if there is an IAL + if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/ + line = $1.strip + ial = $2 + al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil]) + end + level = num_leading_hashes(line) + text = parse_lines_as_span [strip_hashes(line)] + return md_header(level, text, al) + end + + def read_xml_instruction(src, output) + m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line + raise "BugBug" if not m + target = m[2] || '' + code = m[3] + until code =~ /\?>/ + code += "\n"+src.shift_line + end + if not code =~ (/\?>\s*$/) + garbage = (/\?>(.*)$/.match(code))[1] + maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+ + add_tabs(code, 1, '|'), src + end + code.gsub!(/\?>\s*$/, '') + + if target == 'mrk' && MaRuKu::Globals[:unsafe_features] + result = safe_execute_code(self, code) + if result + if result.kind_of? String + raise "Not expected" + else + output.push *result + end + end + else + output.push md_xml_instr(target, code) + end + end + + def read_raw_html(src) + h = HTMLHelper.new + begin + h.eat_this(l=src.shift_line) +# puts "\nBLOCK:\nhtml -> #{l.inspect}" + while src.cur_line and not h.is_finished? + l=src.shift_line +# puts "html -> #{l.inspect}" + h.eat_this "\n"+l + end + rescue Exception => e + ex = e.inspect + e.backtrace.join("\n") + maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'|')}\n", src + end + raw_html = h.stuff_you_read + return md_html(raw_html) + end + + def read_paragraph(src) + lines = [] + while src.cur_line + # :olist does not break + case t = src.cur_line.md_type + when :quote,:header3,:empty,:raw_html,:ref_definition,:ial,:xml_instr + break + when :olist,:ulist + break if src.next_line.md_type == t + end + break if src.cur_line.strip.size == 0 + break if [:header1,:header2].include? src.next_line.md_type + break if any_matching_block_extension?(src.cur_line) + + lines << src.shift_line + end +# dbg_describe_ary(lines, 'PAR') + children = parse_lines_as_span(lines, src) + + return md_par(children) + end + + # Reads one list item, either ordered or unordered. + def read_list_item(src) + parent_offset = src.cur_index + + item_type = src.cur_line.md_type + first = src.shift_line + + # Ugly things going on inside `read_indented_content` + indentation = spaces_before_first_char(first) + break_list = [:ulist, :olist, :ial] + lines, want_my_paragraph = + read_indented_content(src,indentation, break_list, item_type) + + # add first line + # Strip first '*', '-', '+' from first line + stripped = first[indentation, first.size-1] + lines.unshift stripped + + #dbg_describe_ary(lines, 'LIST ITEM ') + + src2 = LineSource.new(lines, src, parent_offset) + children = parse_blocks(src2) + with_par = want_my_paragraph || (children.size>1) + + return md_li(children, with_par) + end + + def read_abbreviation(src) + if not (l=src.shift_line) =~ Abbreviation + maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}" + end + + abbr = $1 + desc = $2 + + if (not abbr) or (abbr.size==0) + maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}" + end + + self.abbreviations[abbr] = desc + + return md_abbr_def(abbr, desc) + end + + def read_footnote_text(src) + parent_offset = src.cur_index + + first = src.shift_line + + if not first =~ FootnoteText + maruku_error "Bug (it's Andrea's fault)" + end + + id = $1 + text = $2 + + # Ugly things going on inside `read_indented_content` + indentation = 4 #first.size-text.size + +# puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}" + + break_list = [:footnote_text] + item_type = :footnote_text + lines, want_my_paragraph = + read_indented_content(src,indentation, break_list, item_type) + + # add first line + if text && text.strip != "" then lines.unshift text end + +# dbg_describe_ary(lines, 'FOOTNOTE') + src2 = LineSource.new(lines, src, parent_offset) + children = parse_blocks(src2) + + e = md_footnote(id, children) + self.footnotes[id] = e + return e + end + + + # This is the only ugly function in the code base. + # It is used to read list items, descriptions, footnote text + def read_indented_content(src, indentation, break_list, item_type) + lines =[] + # collect all indented lines + saw_empty = false; saw_anything_after = false + while src.cur_line + #puts "#{src.cur_line.md_type} #{src.cur_line.inspect}" + if src.cur_line.md_type == :empty + saw_empty = true + lines << src.shift_line + next + end + + # after a white line + if saw_empty + # we expect things to be properly aligned + if (ns=number_of_leading_spaces(src.cur_line)) < indentation + #puts "breaking for spaces, only #{ns}: #{src.cur_line}" + break + end + saw_anything_after = true + else + break if break_list.include? src.cur_line.md_type +# break if src.cur_line.md_type != :text + end + + + stripped = strip_indent(src.shift_line, indentation) + lines << stripped + + #puts "Accepted as #{stripped.inspect}" + + # You are only required to indent the first line of + # a child paragraph. + if stripped.md_type == :text + while src.cur_line && (src.cur_line.md_type == :text) + lines << strip_indent(src.shift_line, indentation) + end + end + end + + want_my_paragraph = saw_anything_after || + (saw_empty && (src.cur_line && (src.cur_line.md_type == item_type))) + +# dbg_describe_ary(lines, 'LI') + # create a new context + + while lines.last && (lines.last.md_type == :empty) + lines.pop + end + + return lines, want_my_paragraph + end + + + def read_quote(src) + parent_offset = src.cur_index + + lines = [] + # collect all indented lines + while src.cur_line && src.cur_line.md_type == :quote + lines << unquote(src.shift_line) + end +# dbg_describe_ary(lines, 'QUOTE') + + src2 = LineSource.new(lines, src, parent_offset) + children = parse_blocks(src2) + return md_quote(children) + end + + def read_code(src) + # collect all indented lines + lines = [] + while src.cur_line && ([:code, :empty].include? src.cur_line.md_type) + lines << strip_indent(src.shift_line, 4) + end + + #while lines.last && (lines.last.md_type == :empty ) + while lines.last && lines.last.strip.size == 0 + lines.pop + end + + while lines.first && lines.first.strip.size == 0 + lines.shift + end + + return nil if lines.empty? + + source = lines.join("\n") + +# dbg_describe_ary(lines, 'CODE') + + return md_codeblock(source) + end + + # Reads a series of metadata lines with empty lines in between + def read_metadata(src) + hash = {} + while src.cur_line + case src.cur_line.md_type + when :empty; src.shift_line + when :metadata; hash.merge! parse_metadata(src.shift_line) + else break + end + end + hash + end + + + def read_ref_definition(src) + line = src.shift_line + + # if link is incomplete, shift next line + if src.cur_line && (src.cur_line.md_type != :ref_definition) && + ([1,2,3].include? number_of_leading_spaces(src.cur_line) ) + line += " "+ src.shift_line + end + +# puts "total= #{line}" + + match = LinkRegex.match(line) + if not match + error "Link does not respect format: '#{line}'" + end + + id = match[1]; url = match[2]; title = match[3]; + id = id.strip.downcase + + hash = self.refs[id] = {:url=>url,:title=>title} + + stuff=match[4] + + if stuff + stuff.split.each do |couple| +# puts "found #{couple}" + k, v = couple.split('=') + v ||= "" + if v[0,1]=='"' then v = v[1, v.size-2] end +# puts "key:_#{k}_ value=_#{v}_" + hash[k.to_sym] = v + end + end +# puts hash.inspect + + return md_ref_def(id, url, meta={:title=>title}) + end + + def read_table(src) + + def split_cells(s) + s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip} + end + + head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) } + + separator=split_cells(src.shift_line) + + align = separator.map { |s| s =~ Sep + if $1 and $2 then :center elsif $2 then :right else :left end } + + num_columns = align.size + + if head.size != num_columns + maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}" + tell_user "I will ignore this table." + # XXX try to recover + return md_br() + end + + rows = [] + + while src.cur_line && src.cur_line =~ /\|/ + row = split_cells(src.shift_line).map{|s| + md_el(:cell, parse_lines_as_span([s]))} + if head.size != num_columns + maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}" + tell_user "I will ignore this table." + # XXX try to recover + return md_br() + end + rows << row + end + + children = (head+rows).flatten + return md_el(:table, children, {:align => align}) + end + + # If current line is text, a definition list is coming + # if 1) text,empty,[text,empty]*,definition + + def eventually_comes_a_def_list(src) + future = src.tell_me_the_future + ok = future =~ %r{^t+e?d}x +# puts "future: #{future} - #{ok}" + ok + end + + + def read_definition(src) + # Read one or more terms + terms = [] + while src.cur_line && src.cur_line.md_type == :text + terms << md_el(:definition_term, parse_lines_as_span([src.shift_line])) + end +# dbg_describe_ary(terms, 'DT') + + want_my_paragraph = false + + raise "Chunky Bacon!" if not src.cur_line + + # one optional empty + if src.cur_line.md_type == :empty + want_my_paragraph = true + src.shift_line + end + + raise "Chunky Bacon!" if src.cur_line.md_type != :definition + + # Read one or more definitions + definitions = [] + while src.cur_line && src.cur_line.md_type == :definition + parent_offset = src.cur_index + + first = src.shift_line + first =~ Definition + first = $1 + + # I know, it's ugly!!! + + lines, w_m_p = + read_indented_content(src,4, [:definition], :definition) + want_my_paragraph ||= w_m_p + + lines.unshift first + +# dbg_describe_ary(lines, 'DD') + src2 = LineSource.new(lines, src, parent_offset) + children = parse_blocks(src2) + definitions << md_el(:definition_data, children) + end + + return md_el(:definition, terms+definitions, { + :terms => terms, + :definitions => definitions, + :want_my_paragraph => want_my_paragraph}) + end +end # BlockLevelParser +end # MaRuKu +end +end \ No newline at end of file diff --git a/lib/maruku/input/parse_doc.rb b/lib/maruku/input/parse_doc.rb new file mode 100644 index 00000000..07c6bfed --- /dev/null +++ b/lib/maruku/input/parse_doc.rb @@ -0,0 +1,225 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +require 'iconv' + + +module MaRuKu; module In; module Markdown; module BlockLevelParser + + def parse_doc(s) + + meta2 = parse_email_headers(s) + data = meta2[:data] + meta2.delete :data + + self.attributes.merge! meta2 + +=begin maruku_doc +Attribute: encoding +Scope: document +Summary: Encoding for the document. + +If the `encoding` attribute is specified, then the content +will be converted from the specified encoding to UTF-8. + +Conversion happens using the `iconv` library. +=end + + enc = self.attributes[:encoding] + self.attributes.delete :encoding + if enc && enc.downcase != 'utf-8' + converted = Iconv.new('utf-8', enc).iconv(data) + +# puts "Data: #{data.inspect}: #{data}" +# puts "Conv: #{converted.inspect}: #{converted}" + + data = converted + end + + @children = parse_text_as_markdown(data) + + if true #markdown_extra? + self.search_abbreviations + self.substitute_markdown_inside_raw_html + end + + toc = create_toc + + # use title if not set + if not self.attributes[:title] and toc.header_element + title = toc.header_element.to_s + self.attributes[:title] = title +# puts "Set document title to #{title}" + end + + # save for later use + self.toc = toc + + # Now do the attributes magic + each_element do |e| + # default attribute list + if default = self.ald[e.node_type.to_s] + expand_attribute_list(default, e.attributes) + end + expand_attribute_list(e.al, e.attributes) +# puts "#{e.node_type}: #{e.attributes.inspect}" + end + +=begin maruku_doc +Attribute: unsafe_features +Scope: global +Summary: Enables execution of XML instructions. + +Disabled by default because of security concerns. +=end + + if Maruku::Globals[:unsafe_features] + self.execute_code_blocks + # TODO: remove executed code blocks + end + end + + # Expands an attribute list in an Hash + def expand_attribute_list(al, result) + al.each do |k, v| + case k + when :class + if not result[:class] + result[:class] = v + else + result[:class] += " " + v + end + when :id; result[:id] = v + when :ref; + if self.ald[v] + already = (result[:expanded_references] ||= []) + if not already.include?(v) + already.push v + expand_attribute_list(self.ald[v], result) + else + already.push v + maruku_error "Circular reference between labels.\n\n"+ + "Label #{v.inspect} calls itself via recursion.\nThe recursion is "+ + (already.map{|x| x.inspect}.join(' => ')) + end + else + if not result[:unresolved_references] + result[:unresolved_references] = v + else + result[:unresolved_references] << " #{v}" + end + + result[v.to_sym] = true + end + else + result[k.to_sym]=v + end + end + end + + def safe_execute_code(object, code) + begin + return object.instance_eval(code) + rescue Exception => e + maruku_error "Exception while executing this:\n"+ + add_tabs(code, 1, ">")+ + "\nThe error was:\n"+ + add_tabs(e.inspect+"\n"+e.caller.join("\n"), 1, "|") + rescue RuntimeError => e + maruku_error "2: Exception while executing this:\n"+ + add_tabs(code, 1, ">")+ + "\nThe error was:\n"+ + add_tabs(e.inspect, 1, "|") + rescue SyntaxError => e + maruku_error "2: Exception while executing this:\n"+ + add_tabs(code, 1, ">")+ + "\nThe error was:\n"+ + add_tabs(e.inspect, 1, "|") + end + nil + end + + def execute_code_blocks + self.each_element(:xml_instr) do |e| + if e.target == 'maruku' + result = safe_execute_code(e, e.code) + if result.kind_of?(String) + puts "Result is : #{result.inspect}" + end + end + end + end + + def search_abbreviations + self.abbreviations.each do |abbrev, title| + reg = Regexp.new(Regexp.escape(abbrev)) + self.replace_each_string do |s| + if m = reg.match(s) + e = md_abbr(abbrev.dup, title ? title.dup : nil) + [m.pre_match, e, m.post_match] + else + s + end + end + end + end + + include REXML + # (PHP Markdown extra) Search for elements that have + # markdown=1 or markdown=block defined + def substitute_markdown_inside_raw_html + self.each_element(:raw_html) do |e| + doc = e.instance_variable_get :@parsed_html + if doc # valid html + # parse block-level markdown elements in these HTML tags + block_tags = ['div'] + + # use xpath to find elements with 'markdown' attribute + XPath.match(doc, "//*[attribute::markdown]" ).each do |e| +# puts "Found #{e}" + # should we parse block-level or span-level? + parse_blocks = (e.attributes['markdown'] == 'block') || + block_tags.include?(e.name) + # remove 'markdown' attribute + e.delete_attribute 'markdown' + # Select all text elements of e + XPath.match(e, "//text()" ).each { |original_text| + s = original_text.value.strip + if s.size > 0 + el = md_el(:dummy, + parse_blocks ? parse_text_as_markdown(s) : + parse_lines_as_span([s]) ) + p = original_text.parent + el.children_to_html.each do |x| + p.insert_before(original_text, x) + end + p.delete(original_text) + + end + } + + end + + end + end + end + +end end end end diff --git a/lib/maruku/input/parse_span_better.rb b/lib/maruku/input/parse_span_better.rb new file mode 100644 index 00000000..211d7683 --- /dev/null +++ b/lib/maruku/input/parse_span_better.rb @@ -0,0 +1,692 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +require 'set' + +module MaRuKu; module In; module Markdown; module SpanLevelParser + include MaRuKu::Helpers + + EscapedCharInText = + Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>] + + EscapedCharInQuotes = + Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"] + + EscapedCharInInlineCode = [?\\,?`] + + def parse_lines_as_span(lines, parent=nil) + parse_span_better lines.join("\n"), parent + end + + def parse_span_better(string, parent=nil) + if not string.kind_of? String then + error "Passed #{string.class}." end + + st = (string + "") + st.freeze + src = CharSource.new(st, parent) + read_span(src, EscapedCharInText, [nil]) + end + + # This is the main loop for reading span elements + # + # It's long, but not *complex* or difficult to understand. + # + # + def read_span(src, escaped, exit_on_chars, exit_on_strings=nil) + con = SpanContext.new + c = d = nil + while true + c = src.cur_char + + # This is only an optimization which cuts 50% of the time used. + # (but you can't use a-zA-z in exit_on_chars) + if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z))) + con.cur_string << src.shift_char + next + end + + break if exit_on_chars && exit_on_chars.include?(c) + break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x} + + # check if there are extensions + if check_span_extensions(src, con) + next + end + + case c = src.cur_char + when ?\ # it's space (32) + if src.cur_chars_are " \n" + src.ignore_chars(3) + con.push_element md_br() + next + else + src.ignore_char + con.push_space + end + when ?\n, ?\t + src.ignore_char + con.push_space + when ?` + read_inline_code(src,con) + when ?< + # It could be: + # 1) HTML "<div ..." + # 2) HTML "<!-- ..." + # 3) url "<http:// ", "<ftp:// ..." + # 4) email "<andrea@... ", "<mailto:andrea@..." + # 5) on itself! "a < b " + # 6) Start of <<guillemettes>> + + case d = src.next_char + when ?<; # guillemettes + src.ignore_chars(2) + con.push_char ?< + con.push_char ?< + when ?!; + if src.cur_chars_are '<!--' + read_inline_html(src, con) + else + con.push_char src.shift_char + end + when ?? + read_xml_instr_span(src, con) + when ?\ , ?\t + con.push_char src.shift_char + else + if src.next_matches(/<mailto:/) or + src.next_matches(/<[\w\.]+\@/) + read_email_el(src, con) + elsif src.next_matches(/<\w+:/) + read_url_el(src, con) + elsif src.next_matches(/<\w/) + #puts "This is HTML: #{src.cur_chars(20)}" + read_inline_html(src, con) + else + #puts "This is NOT HTML: #{src.cur_chars(20)}" + con.push_char src.shift_char + end + end + when ?\\ + d = src.next_char + if d == ?' + src.ignore_chars(2) + con.push_element md_entity('apos') + elsif d == ?" + src.ignore_chars(2) + con.push_element md_entity('quot') + elsif escaped.include? d + src.ignore_chars(2) + con.push_char d + else + con.push_char src.shift_char + end + when ?[ + if markdown_extra? && src.next_char == ?^ + read_footnote_ref(src,con) + else + read_link(src, con) + end + when ?! + if src.next_char == ?[ + read_image(src, con) + else + con.push_char src.shift_char + end + when ?& + if m = src.read_regexp(/\&([\w\d]+);/) + con.push_element md_entity(m[1]) + else + con.push_char src.shift_char + end + when ?* + if not src.next_char + maruku_error "Opening * as last char.", src, con + maruku_recover "Threating as literal" + con.push_char src.shift_char + else + follows = src.cur_chars(4) + if follows =~ /^\*\*\*[^\s\*]/ + con.push_element read_emstrong(src,'***') + elsif follows =~ /^\*\*[^\s\*]/ + con.push_element read_strong(src,'**') + elsif follows =~ /^\*[^\s\*]/ + con.push_element read_em(src,'*') + else # * is just a normal char + con.push_char src.shift_char + end + end + when ?_ + if not src.next_char + maruku_error "Opening _ as last char", src, con + maruku_recover "Threating as literal", src, con + con.push_char src.shift_char + else + follows = src.cur_chars(4) + if follows =~ /^\_\_\_[^\s\_]/ + con.push_element read_emstrong(src,'___') + elsif follows =~ /^\_\_[^\s\_]/ + con.push_element read_strong(src,'__') + elsif follows =~ /^\_[^\s\_]/ + con.push_element read_em(src,'_') + else # _ is just a normal char + con.push_char src.shift_char + end + end + when ?{ # extension + src.ignore_char # { + interpret_extension(src, con, [?}]) + src.ignore_char # } + when nil + maruku_error ("Unclosed span (waiting for %s"+ + "#{exit_on_strings.inspect})") % [ + exit_on_chars ? "#{exit_on_chars.inspect} or" : ""], + src,con + break + else # normal text + con.push_char src.shift_char + end # end case + end # end while true + con.push_string_if_present + + # Assign IAL to elements + merge_ial(con.elements, src, con) + + + # Remove leading space + if (s = con.elements.first).kind_of? String + if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end + con.elements.shift if s.size == 0 + end + + # Remove final spaces + if (s = con.elements.last).kind_of? String + s.chop! if s[-1] == ?\ + con.elements.pop if s.size == 0 + end + + educated = educate(con.elements) + + educated + end + + + def read_xml_instr_span(src, con) + src.ignore_chars(2) # starting <? + + # read target <?target code... ?> + target = if m = src.read_regexp(/(\w+)/) + m[1] + else + '' + end + + delim = "?>" + + code = + read_simple(src, escaped=[], break_on_chars=[], + break_on_strings=[delim]) + + src.ignore_chars delim.size + + code = (code || "").strip + con.push_element md_xml_instr(target, code) + end + + # Start: cursor on character **after** '{' + # End: curson on '}' or EOF + def interpret_extension(src, con, break_on_chars) + case src.cur_char + when ?: + src.ignore_char # : + extension_meta(src, con, break_on_chars) + when ?#, ?. + extension_meta(src, con, break_on_chars) + else + stuff = read_simple(src, escaped=[?}], break_on_chars, []) + if stuff =~ /^(\w+\s|[^\w])/ + extension_id = $1.strip + if false + else + maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+ + "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con + extension_meta(src, con, break_on_chars) + end + else + maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con + extension_meta(src, con, break_on_chars) + end + end + end + + def extension_meta(src, con, break_on_chars) + if m = src.read_regexp(/(\w)+\:/) + name = m[1] + content = m[2] + al = read_attribute_list(src, con, break_on_chars) + self.doc.ald[name] = al + con.push md_ald(name, al) + else + al = read_attribute_list(src, con, break_on_chars) + self.doc.ald[name] = al + con.push md_ial(al) + end + end + + def read_url_el(src,con) + src.ignore_char # leading < + url = read_simple(src, [], [?>]) + src.ignore_char # closing > + + con.push_element md_url(url) + end + + def read_email_el(src,con) + src.ignore_char # leading < + mail = read_simple(src, [], [?>]) + src.ignore_char # closing > + + address = mail.gsub(/^mailto:/,'') + con.push_element md_email(address) + end + + def read_url(src, break_on) + if [?',?"].include? src.cur_char + error 'Invalid char for url', src + end + + url = read_simple(src, [], break_on) + if not url # empty url + url = "" + end + + if url[0] == ?< && url[-1] == ?> + url = url[1, url.size-2] + end + + if url.size == 0 + return nil + end + + url + end + + + def read_quoted_or_unquoted(src, con, escaped, exit_on_chars) + case src.cur_char + when ?', ?" + read_quoted(src, con) + else + read_simple(src, escaped, exit_on_chars) + end + end + + # Tries to read a quoted value. If stream does not + # start with ' or ", returns nil. + def read_quoted(src, con) + case src.cur_char + when ?', ?" + quote_char = src.shift_char # opening quote + string = read_simple(src, EscapedCharInQuotes, [quote_char]) + src.ignore_char # closing quote + return string + else +# puts "Asked to read quote from: #{src.cur_chars(10).inspect}" + return nil + end + end + + # Reads a simple string (no formatting) until one of break_on_chars, + # while escaping the escaped. + # If the string is empty, it returns nil. + # Raises on error if the string terminates unexpectedly. +# # If eat_delim is true, and if the delim is not the EOF, then the delim +# # gets eaten from the stream. + def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil) + text = "" + while true +# puts "Reading simple #{text.inspect}" + c = src.cur_char + if exit_on_chars && exit_on_chars.include?(c) +# src.ignore_char if eat_delim + break + end + + break if exit_on_strings && + exit_on_strings.any? {|x| src.cur_chars_are x} + + case c + when nil + s= "String finished while reading (break on "+ + "#{exit_on_chars.map{|x|""<<x}.inspect})"+ + " already read: #{text.inspect}" + maruku_error s, src + maruku_recover "I boldly continue", src + break + when ?\\ + d = src.next_char + if escaped.include? d + src.ignore_chars(2) + text << d + else + text << src.shift_char + end + else + text << src.shift_char + end + end +# puts "Read simple #{text.inspect}" + text.empty? ? nil : text + end + + def read_em(src, delim) + src.ignore_char + children = read_span(src, EscapedCharInText, nil, [delim]) + src.ignore_char + md_em(children) + end + + def read_strong(src, delim) + src.ignore_chars(2) + children = read_span(src, EscapedCharInText, nil, [delim]) + src.ignore_chars(2) + md_strong(children) + end + + def read_emstrong(src, delim) + src.ignore_chars(3) + children = read_span(src, EscapedCharInText, nil, [delim]) + src.ignore_chars(3) + md_emstrong(children) + end + + SPACE = ?\ # = 32 + +# R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/) + R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/) + + # Reads a bracketed id "[refid]". Consumes also both brackets. + def read_ref_id(src, con) + src.ignore_char # [ + src.consume_whitespace +# puts "Next: #{src.cur_chars(10).inspect}" + if m = src.read_regexp(R_REF_ID) +# puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}" +# puts "Then: #{src.cur_chars(10).inspect}" + m[1] + else + nil + end + end + + def read_footnote_ref(src,con) + ref = read_ref_id(src,con) + con.push_element md_foot_ref(ref) + end + + def read_inline_html(src, con) + h = HTMLHelper.new + begin + # This is our current buffer in the context + start = src.current_remaining_buffer + + h.eat_this start + if not h.is_finished? + error "inline_html: Malformed:\n "+ + "#{start.inspect}\n #{h.inspect}",src,con + end + + consumed = start.size - h.rest.size + if consumed > 0 + con.push_element md_html(h.stuff_you_read) + src.ignore_chars(consumed) + else + puts "HTML helper did not work on #{start.inspect}" + con.push_char src.shift_char + end + rescue Exception => e + maruku_error "Bad html: \n" + + add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'), + src,con + maruku_recover "I will try to continue after bad HTML.", src, con + con.push_char src.shift_char + end + end + + def read_inline_code(src, con) + # Count the number of ticks + num_ticks = 0 + while src.cur_char == ?` + num_ticks += 1 + src.ignore_char + end + # We will read until this string + end_string = "`"*num_ticks + + code = + read_simple(src, escaped=[], break_on_chars=[], + break_on_strings=[end_string]) + +# puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}" + src.ignore_chars num_ticks + + # Ignore at most one space + if num_ticks > 1 && code[0] == SPACE + code = code[1, code.size-1] + end + + # drop last space + if num_ticks > 1 && code[-1] == SPACE + code = code[0,code.size-1] + end + +# puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} " + con.push_element md_code(code) + end + + def read_link(src, con) + # we read the string and see what happens + src.ignore_char # opening bracket + children = read_span(src, EscapedCharInText, [?]]) + src.ignore_char # closing bracket + + # ignore space + if src.cur_char == SPACE and + (src.next_char == ?[ or src.next_char == ?( ) + src.shift_char + end + + case src.cur_char + when ?( + src.ignore_char # opening ( + src.consume_whitespace + url = read_url(src, [SPACE,?\t,?)]) + if not url + url = '' # no url is ok + end + src.consume_whitespace + title = nil + if src.cur_char != ?) # we have a title + quote_char = src.cur_char + title = read_quoted(src,con) + + if not title + maruku_error 'Must quote title',src,con + else + # Tries to read a title with quotes: ![a](url "ti"tle") + # this is the most ugly thing in Markdown + if not src.next_matches(/\s*\)/) + # if there is not a closing par ), then read + # the rest and guess it's title with quotes + rest = read_simple(src, escaped=[], break_on_chars=[?)], + break_on_strings=[]) + # chop the closing char + rest.chop! + title << quote_char << rest + end + end + end + src.consume_whitespace + closing = src.shift_char # closing ) + if closing != ?) + maruku_error 'Unclosed link',src,con + maruku_recover "No closing ): I will not create"+ + " the link for #{children.inspect}", src, con + con.push_elements children + return + end + con.push_element md_im_link(children,url, title) + when ?[ # link ref + ref_id = read_ref_id(src,con) + if ref_id + con.push_element md_link(children, ref_id) + else + maruku_error "Could not read ref_id", src, con + maruku_recover "I will not create the link for "+ + "#{children.inspect}", src, con + con.push_elements children + return + end + else # empty [link] + con.push_element md_link(children, "") + end + end # read link + + def read_image(src, con) + src.ignore_chars(2) # opening "![" + alt_text = read_span(src, EscapedCharInText, [?]]) + src.ignore_char # closing bracket + # ignore space + if src.cur_char == SPACE and + (src.next_char == ?[ or src.next_char == ?( ) + src.ignore_char + end + case src.cur_char + when ?( + src.ignore_char # opening ( + src.consume_whitespace + url = read_url(src, [SPACE,?\t,?)]) + if not url + error "Could not read url from #{src.cur_chars(10).inspect}", + src,con + end + src.consume_whitespace + title = nil + if src.cur_char != ?) # we have a title + quote_char = src.cur_char + title = read_quoted(src,con) + if not title + maruku_error 'Must quote title',src,con + else + # Tries to read a title with quotes: ![a](url "ti"tle") + # this is the most ugly thing in Markdown + if not src.next_matches(/\s*\)/) + # if there is not a closing par ), then read + # the rest and guess it's title with quotes + rest = read_simple(src, escaped=[], break_on_chars=[?)], + break_on_strings=[]) + # chop the closing char + rest.chop! + title << quote_char << rest + end + end + end + src.consume_whitespace + closing = src.shift_char # closing ) + if closing != ?) + error ("Unclosed link: '"<<closing<<"'")+ + " Read url=#{url.inspect} title=#{title.inspect}",src,con + end + con.push_element md_im_image(alt_text, url, title) + when ?[ # link ref + ref_id = read_ref_id(src,con) + con.push_element md_image(alt_text, ref_id) + else # no stuff + con.push_elements children + end + end # read link + + + class SpanContext + include MaRuKu::Strings + + # Read elements + attr_accessor :elements + attr_accessor :cur_string + + def initialize + @elements = [] + @cur_string = "" + end + + def push_element(e) + raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if + not (e.kind_of?(String) or e.kind_of?(MDElement)) + + push_string_if_present + @elements << e + nil + end + alias push push_element + + def push_elements(a) + for e in a + if e.kind_of? String + e.each_byte do |b| push_char b end + else + push_element e + end + end + end + def push_string_if_present + if @cur_string.size > 0 + @elements << @cur_string + @cur_string = "" + end + nil + end + + def push_char(c) + @cur_string << c + nil + end + + # push space into current string if + # there isn't one + def push_space + last = @cur_string[@cur_string.size-1] + @cur_string << ?\ if last != ?\ + end + + def describe + lines = @elements.map{|x| x.inspect}.join("\n") + s = "Elements read in span: \n" + + add_tabs(lines,1, ' -')+"\n" + + if @cur_string.size > 0 + s += "Current string: \n #{@cur_string.inspect}\n" + end + s + end + end # SpanContext + +end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser + diff --git a/lib/maruku/input/rubypants.rb b/lib/maruku/input/rubypants.rb new file mode 100644 index 00000000..02b52abf --- /dev/null +++ b/lib/maruku/input/rubypants.rb @@ -0,0 +1,225 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + +# +# NOTA BENE: +# +# The following algorithm is a rip-off of RubyPants written by +# Christian Neukirchen. +# +# RubyPants is a Ruby port of SmartyPants written by John Gruber. +# +# This file is distributed under the GPL, which I guess is compatible +# with the terms of the RubyPants license. +# +# -- Andrea Censi + + +# = RubyPants -- SmartyPants ported to Ruby +# +# Ported by Christian Neukirchen <mailto:chneukirchen@gmail.com> +# Copyright (C) 2004 Christian Neukirchen +# +# Incooporates ideas, comments and documentation by Chad Miller +# Copyright (C) 2004 Chad Miller +# +# Original SmartyPants by John Gruber +# Copyright (C) 2003 John Gruber +# + +# +# = RubyPants -- SmartyPants ported to Ruby +# +# +# [snip] +# +# == Authors +# +# John Gruber did all of the hard work of writing this software in +# Perl for Movable Type and almost all of this useful documentation. +# Chad Miller ported it to Python to use with Pyblosxom. +# +# Christian Neukirchen provided the Ruby port, as a general-purpose +# library that follows the *Cloth API. +# +# +# == Copyright and License +# +# === SmartyPants license: +# +# Copyright (c) 2003 John Gruber +# (http://daringfireball.net) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name "SmartyPants" nor the names of its contributors +# may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# This software is provided by the copyright holders and contributors +# "as is" and any express or implied warranties, including, but not +# limited to, the implied warranties of merchantability and fitness +# for a particular purpose are disclaimed. In no event shall the +# copyright owner or contributors be liable for any direct, indirect, +# incidental, special, exemplary, or consequential damages (including, +# but not limited to, procurement of substitute goods or services; +# loss of use, data, or profits; or business interruption) however +# caused and on any theory of liability, whether in contract, strict +# liability, or tort (including negligence or otherwise) arising in +# any way out of the use of this software, even if advised of the +# possibility of such damage. +# +# === RubyPants license +# +# RubyPants is a derivative work of SmartyPants and smartypants.py. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# This software is provided by the copyright holders and contributors +# "as is" and any express or implied warranties, including, but not +# limited to, the implied warranties of merchantability and fitness +# for a particular purpose are disclaimed. In no event shall the +# copyright owner or contributors be liable for any direct, indirect, +# incidental, special, exemplary, or consequential damages (including, +# but not limited to, procurement of substitute goods or services; +# loss of use, data, or profits; or business interruption) however +# caused and on any theory of liability, whether in contract, strict +# liability, or tort (including negligence or otherwise) arising in +# any way out of the use of this software, even if advised of the +# possibility of such damage. +# +# +# == Links +# +# John Gruber:: http://daringfireball.net +# SmartyPants:: http://daringfireball.net/projects/smartypants +# +# Chad Miller:: http://web.chad.org +# +# Christian Neukirchen:: http://kronavita.de/chris + + +module MaRuKu; module In; module Markdown; module SpanLevelParser + Punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]' + Close_class = %![^\ \t\r\n\\[\{\(\-]! + + Rules = [ + [/---/, :mdash ], + [/--/, :ndash ], + ['...', :hellip ], + ['. . .', :hellip ], + ["``", :ldquo ], + ["''", :rdquo ], + [/<<\s/, [:laquo, :nbsp] ], + [/\s>>/, [:nbsp, :raquo] ], + [/<</, :laquo ], + [/>>/, :raquo ], + +# def educate_single_backticks(str) +# ["`", :lsquo] +# ["'", :rsquo] + + # Special case if the very first character is a quote followed by + # punctuation at a non-word-break. Close the quotes by brute + # force: + [/^'(?=#{Punct_class}\B)/, :rsquo], + [/^"(?=#{Punct_class}\B)/, :rdquo], + # Special case for double sets of quotes, e.g.: + # <p>He said, "'Quoted' words in a larger quote."</p> + [/"'(?=\w)/, [:ldquo, :lsquo] ], + [/'"(?=\w)/, [:lsquo, :ldquo] ], + # Special case for decade abbreviations (the '80s): + [/'(?=\d\ds)/, :rsquo ], + # Get most opening single quotes: + [/(\s)'(?=\w)/, [:one, :lsquo] ], + # Single closing quotes: + [/(#{Close_class})'/, [:one, :rsquo]], + [/'(\s|s\b|$)/, [:rsquo, :one]], + # Any remaining single quotes should be opening ones: + [/'/, :lsquo], + # Get most opening double quotes: + [/(\s)"(?=\w)/, [:one, :ldquo]], + # Double closing quotes: + [/(#{Close_class})"/, [:one, :rdquo]], + [/"(\s|s\b|$)/, [:rdquo, :one]], + # Any remaining quotes should be opening ones: + [/"/, :ldquo] + ]. + map{|reg, subst| # People should do the thinking, machines should do the work. + reg = Regexp.new(Regexp.escape(reg)) if not reg.kind_of? Regexp + subst = [subst] if not subst.kind_of?Array + [reg, subst]} + +# note: input will be destroyed +def apply_one_rule(reg, subst, input) + output = [] + while first = input.shift + if first.kind_of?(String) && (m = reg.match(first)) + output.push m. pre_match if m. pre_match.size > 0 + input.unshift m.post_match if m.post_match.size > 0 + subst.reverse.each do |x| + input.unshift( x == :one ? m[1] : md_entity(x.to_s) ) end + else + output.push first + end + end + return output +end + +def educate(elements) + Rules.each do |reg, subst| + elements = apply_one_rule(reg, subst, elements) + end + # strips empty strings + elements.delete_if {|x| x.kind_of?(String) && x.size == 0} + final = [] + # join consecutive strings + elements.each do |x| + if x.kind_of?(String) && final.last.kind_of?(String) + final.last << x + else + final << x + end + end + return final +end + +end end end end diff --git a/lib/maruku/input/type_detection.rb b/lib/maruku/input/type_detection.rb new file mode 100644 index 00000000..db0b7bbf --- /dev/null +++ b/lib/maruku/input/type_detection.rb @@ -0,0 +1,141 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + +class String + include MaRuKu::Strings + def md_type() + @md_type ||= line_md_type(self) + end +end + +class NilClass + def md_type() nil end + +end + +# This code does the classification of lines for block-level parsing. +module MaRuKu; module Strings + + def line_md_type(l) + # The order of evaluation is important (:text is a catch-all) + return :text if l =~ /^[a-zA-Z]/ + return :code if number_of_leading_spaces(l)>=4 + return :empty if l =~ /^\s*$/ + return :footnote_text if l =~ FootnoteText + return :ref_definition if l =~ LinkRegex or l=~ IncompleteLink + return :abbreviation if l =~ Abbreviation + return :definition if l =~ Definition + # I had a bug with emails and urls at the beginning of the + # line that were mistaken for raw_html + return :text if l=~EMailAddress or l=~ URL + # raw html is like PHP Markdown Extra: at most three spaces before + return :xml_instr if l =~ %r{^\s*<\?} + return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+} + return :raw_html if l =~ %r{[ ]{0,3}<\!\-\-} + return :ulist if l =~ /^\s?([\*\-\+])\s+.*\w+/ + return :olist if l =~ /^\s?\d+\..*\w+/ + return :header1 if l =~ /^(=)+/ + return :header2 if l =~ /^([-\s])+$/ + return :header3 if l =~ /^(#)+\s*\S+/ + # at least three asterisks on a line, and only whitespace + return :hrule if l =~ /^(\s*\*\s*){3,1000}$/ + return :hrule if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens + return :hrule if l =~ /^(\s*_\s*){3,1000}$/ # or underscores + return :quote if l =~ /^>/ + return :metadata if l =~ /^@/ +# if @@new_meta_data? + return :ald if l =~ AttributeDefinitionList + return :ial if l =~ InlineAttributeList +# end +# return :equation_end if l =~ EquationEnd + return :text # else, it's just text + end + + + # $1 = id $2 = attribute list + AttributeDefinitionList = /^\s{0,3}\{([\w\d\s]+)\}:\s*(.*)\s*$/ + # + InlineAttributeList = /^\s{0,3}\{(.*)\}\s*$/ + # Example: + # ^:blah blah + # ^: blah blah + # ^ : blah blah + Definition = %r{ + ^ # begin of line + [ ]{0,3} # up to 3 spaces + : # colon + \s* # whitespace + (\S.*) # the text = $1 + $ # end of line + }x + + # Example: + # *[HTML]: Hyper Text Markup Language + Abbreviation = %r{ + ^ # begin of line + \* # one asterisk + \[ # opening bracket + ([^\]]+) # any non-closing bracket: id = $1 + \] # closing bracket + : # colon + \s* # whitespace + (\S.*\S)* # definition=$2 + \s* # strip this whitespace + $ # end of line + }x + + FootnoteText = %r{ + ^\s*\[(\^.+)\]: # id = $1 (including '^') + \s*(\S.*)?$ # text = $2 (not obb.) + }x + + # This regex is taken from BlueCloth sources + # Link defs are in the form: ^[id]: \n? url "optional title" + LinkRegex = %r{ + ^[ ]*\[([^\]]+)\]: # id = $1 + [ ]* + <?(\S+)>? # url = $2 + [ ]* + (?:# Titles are delimited by "quotes" or (parens). + ["('] + (.+?) # title = $3 + [")'] # Matching ) or " + \s*(.+)? # stuff = $4 + )? # title is optional + }x + + IncompleteLink = %r{^\s*\[(.+)\]:\s*$} + + HeaderWithId = /^(.*)\{\#([\w_-]+)\}\s*$/ + + HeaderWithAttributes = /^(.*)\{(.*)\}\s*$/ + + + # if contains a pipe, it could be a table header + MightBeTableHeader = %r{\|} + # -------------: + Sep = /\s*(\:)?\s*-+\s*(\:)?\s*/ + # | -------------:| ------------------------------ | + TableSeparator = %r{^(\|?#{Sep}\|?)+\s*$} + + + EMailAddress = /<([^:]+@[^:]+)>/ + URL = /^<http:/ +end end \ No newline at end of file diff --git a/lib/maruku/maruku.rb b/lib/maruku/maruku.rb new file mode 100644 index 00000000..0ce5545c --- /dev/null +++ b/lib/maruku/maruku.rb @@ -0,0 +1,33 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +# The Maruku class is the public interface + +class Maruku + + def initialize(s=nil, meta={}) + super(nil) + self.attributes.merge! meta + if s + parse_doc(s) + end + end +end diff --git a/lib/maruku/output/to_html.rb b/lib/maruku/output/to_html.rb new file mode 100644 index 00000000..8cf401d3 --- /dev/null +++ b/lib/maruku/output/to_html.rb @@ -0,0 +1,689 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +require 'rexml/document' + +require 'rubygems' +require 'syntax' +require 'syntax/convertors/html' + + +class String + # A string is rendered into HTML by creating + # a REXML::Text node. REXML takes care of all the encoding. + def to_html + REXML::Text.new(self) + end +end + + +class REXML::Element + # We only want to output the children in Maruku::to_html + public :write_children +end + +# This module groups all functions related to HTML export. +module MaRuKu; module Out; module HTML + include REXML + + # Render as an HTML fragment (no head, just the content of BODY). (returns a string) + def to_html(context={}) + indent = context[:indent] || -1 + ie_hack = context[:ie_hack] ||true + + div = Element.new 'dummy' + children_to_html.each do |e| + div << e + end + + # render footnotes + if @doc.footnotes_order.size > 0 + div << render_footnotes + end + + doc = Document.new(nil,{:respect_whitespace =>:all}) + doc << div + + # REXML Bug? if indent!=-1 whitespace is not respected for 'pre' elements + # containing code. + xml ="" + div.write_children(xml,indent,transitive=true,ie_hack) + xml + end + + # Render to a complete HTML document (returns a string) + def to_html_document(context={}) + indent = context[:indent] || -1 + ie_hack = context[:ie_hack] ||true + doc = to_html_document_tree + xml = "" + + # REXML Bug? if indent!=-1 whitespace is not respected for 'pre' elements + # containing code. + doc.write(xml,indent,transitive=true,ie_hack); + + xhtml10strict = " +<?xml version='1.0' encoding='utf-8'?> +<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN' +'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>\n" + + xhtml11strict_mathml2 = '<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN" + "http://www.w3.org/TR/MathML2/dtd/xhtml-math11-f.dtd" [ + <!ENTITY mathml "http://www.w3.org/1998/Math/MathML"> +]> +' + +xhtml11_mathml2_svg11 = +'<?xml version="1.0" encoding="utf-8"?> +<!DOCTYPE html PUBLIC + "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" + "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"> +' + + xhtml11_mathml2_svg11 + xml + end + + def xml_newline() Text.new("\n") end + + # Render to a complete HTML document (returns a REXML document tree) + def to_html_document_tree + doc = Document.new(nil,{:respect_whitespace =>:all}) + # doc << XMLDecl.new + + root = Element.new('html', doc) + root.add_namespace('http://www.w3.org/1999/xhtml') + root.add_namespace('svg', "http://www.w3.org/2000/svg" ) + lang = self.attributes[:lang] || 'en' + root.attributes['xml:lang'] = lang + + root << xml_newline + head = Element.new 'head', root + + #<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8"> + me = Element.new 'meta', head + me.attributes['http-equiv'] = 'Content-type' +# me.attributes['content'] = 'text/html;charset=utf-8' + me.attributes['content'] = 'application/xhtml+xml;charset=utf-8' + + # Create title element + doc_title = self.attributes[:title] || self.attributes[:subject] || "" + title = Element.new 'title', head + title << Text.new(doc_title) + + + + if css_list = self.attributes[:css] + css_list.split.each do |css| + # <link type="text/css" rel="stylesheet" href="..." /> + link = Element.new 'link' + link.attributes['type'] = 'text/css' + link.attributes['rel'] = 'stylesheet' + link.attributes['href'] = css + head << link + head << xml_newline + end + end + + root << xml_newline + + body = Element.new 'body' + + children_to_html.each do |e| + body << e + end + + # render footnotes + if @doc.footnotes_order.size > 0 + body << render_footnotes + end + + # When we are rendering a whole document, we add a signature + # at the bottom. + if get_setting(:maruku_signature) + body << maruku_html_signature + end + + root << body + + doc + end + + # returns "st","nd","rd" or "th" as appropriate + def day_suffix(day) + s = { + 1 => 'st', + 2 => 'nd', + 3 => 'rd', + 21 => 'st', + 22 => 'nd', + 23 => 'rd', + 31 => 'st' + } + return s[day] || 'th'; + end + + # formats a nice date + def nice_date + t = Time.now + t.strftime(" at %H:%M on ")+ + t.strftime("%A, %B %d")+ + day_suffix(t.day)+ + t.strftime(", %Y") + end + + def maruku_html_signature + div = Element.new 'div' + div.attributes['class'] = 'maruku_signature' + Element.new 'hr', div + span = Element.new 'span', div + span.attributes['style'] = 'font-size: small; font-style: italic' + span << Text.new('Created by ') + a = Element.new('a', span) + a.attributes['href'] = 'http://maruku.rubyforge.org' + a.attributes['title'] = 'Maruku: a Markdown-superset interpreter for Ruby' + a << Text.new('Maruku') + span << Text.new(nice_date+".") + div + end + + def render_footnotes + div = Element.new 'div' + div.attributes['class'] = 'footnotes' + div << Element.new('hr') + ol = Element.new 'ol' + @doc.footnotes_order.each_with_index do |fid, i| num = i+1 + f = self.footnotes[fid] + if f + li = f.wrap_as_element('li') + li.attributes['id'] = "fn:#{num}" + + a = Element.new 'a' + a.attributes['href'] = "#fnref:#{num}" + a.attributes['rev'] = 'footnote' + a<< Text.new('↩', true, nil, true) + li.insert_after(li.children.last, a) + ol << li + else + maruku_error"Could not find footnote '#{fid}'" + end + end + div << ol + div + end + + + def to_html_hrule; create_html_element 'hr' end + def to_html_linebreak; Element.new 'br' end + + # renders children as html and wraps into an element of given name + # + # Sets 'id' if meta is set + def wrap_as_element(name, attributes_to_copy=[]) + m = create_html_element(name, attributes_to_copy) + children_to_html.each do |e| m << e; end + +# m << Comment.new( "{"+self.al.to_md+"}") if not self.al.empty? +# m << Comment.new( @attributes.inspect) if not @attributes.empty? + m + end + + StandardAttributes = [:id, :style, :class] + def create_html_element(name, attributes_to_copy=[]) + m = Element.new name + (StandardAttributes+attributes_to_copy).each do |a| + if v = @attributes[a] then m.attributes[a.to_s] = v.to_s end + end + m + end + + + def to_html_ul + if @attributes[:toc] + # render toc + html_toc = @doc.toc.to_html + return html_toc + else + add_ws wrap_as_element('ul') + end + end + + + def to_html_paragraph; add_ws wrap_as_element('p') end + def to_html_ol; add_ws wrap_as_element('ol') end + def to_html_li; add_ws wrap_as_element('li') end + def to_html_li_span; add_ws wrap_as_element('li') end + def to_html_quote; add_ws wrap_as_element('blockquote') end + def to_html_strong; wrap_as_element('strong') end + def to_html_emphasis; wrap_as_element('em') end + + # nil if not applicable, else string + def section_number + return nil if not @doc.attributes[:use_numbered_headers] + + n = @attributes[:section_number] + if n && (not n.empty?) + n.join('.')+". " + else + nil + end + end + + # nil if not applicable, else SPAN element + def render_section_number + # if we are bound to a section, add section number + if num = section_number + span = Element.new 'span' + span.attributes['class'] = 'maruku_section_number' + span << Text.new(section_number) + span + else + nil + end + end + + def to_html_header + element_name = "h#{self.level}" + h = wrap_as_element element_name + + if span = render_section_number + h.insert_before(h.children.first, span) + end + add_ws h + end + + def source2html(source) + source = source.gsub(/&/,'&') + source = Text.normalize(source) + Text.new(source, true, nil, false ) + end + +=begin maruku_doc +Attribute: html_use_syntax +Scope: document +Output: html +Summary: Enables the use of the `syntax` package. +Related: lang, code_lang +Default: <?mrk Globals[:html_use_syntax].to_s ?> + +If false, Maruku does not append a signature to the +generated file. +=end + + def to_html_code; + source = self.raw_code + + lang = self.attributes[:lang] || @doc.attributes[:code_lang] + + lang = 'xml' if lang=='html' + + + use_syntax = get_setting :html_use_syntax + + element = + if use_syntax && lang + begin + convertor = Syntax::Convertors::HTML.for_syntax lang + + # eliminate trailing newlines otherwise Syntax crashes + source = source.gsub(/\n*\Z/,'') + + html = convertor.convert( source ) + + pre = Document.new(html, {:respect_whitespace =>:all}).root + pre.attributes['class'] = lang + pre + rescue Object => e + maruku_error"Error while using the syntax library for code:\n#{source.inspect}"+ + "Lang is #{lang} object is: \n"+ + self.inspect + + "\nException: #{e.class}: #{e.message}\n\t#{e.backtrace.join("\n\t")}" + + tell_user("Using normal PRE because the syntax library did not work.") + to_html_code_using_pre(source) + end + else + to_html_code_using_pre(source) + end + + color = get_setting(:code_background_color) + if color != Globals[:code_background_color] + element.attributes['style'] = "background-color: #{color};" + end + element + end + + def to_html_code_using_pre(source) + pre = create_html_element 'pre' + code = Element.new 'code', pre + s = source + + s = s.gsub(/&/,'&') + s = Text.normalize(s) + s = s.gsub(/\'/,''') # IE bug + s = s.gsub(/'/,''') # IE bug + + if get_setting(:code_show_spaces) + # 187 = raquo + # 160 = nbsp + # 172 = not + s.gsub!(/\t/,'»'+' '*3) + s.gsub!(/ /,'¬') + end + + text = Text.new(s, respect_ws=true, parent=nil, raw=true ) + + code << text + pre + end + + def to_html_inline_code; + pre = create_html_element 'code' + source = self.raw_code + pre << source2html(source) + + color = get_setting(:code_background_color) + if color != Globals[:code_background_color] + pre.attributes['style'] = "background-color: #{color};" + end + + pre + end + + def to_html_immediate_link + a = create_html_element 'a' + url = self.url + text = url.gsub(/^mailto:/,'') # don't show mailto + a << Text.new(text) + a.attributes['href'] = url + a + end + + def to_html_link + a = wrap_as_element 'a' + id = self.ref_id + # if empty, use text + if id.size == 0 + id = children.to_s.downcase + end + + if ref = @doc.refs[id] + url = ref[:url] + title = ref[:title] + a.attributes['href'] = url if url + a.attributes['title'] = title if title + else + maruku_error"Could not find ref_id = #{id.inspect} for #{self.inspect}" + tell_user "Not creating a link for ref_id = #{id.inspect}." + return wrap_as_element('span') + end + return a + end + + def to_html_im_link + if url = self.url + title = self.title + a = wrap_as_element 'a' + a.attributes['href'] = url + a.attributes['title'] = title if title + return a + else + maruku_error"Could not find url in #{self.inspect}" + tell_user "Not creating a link for ref_id = #{id.inspect}." + return wrap_as_element('span') + end + end + + def add_ws(e) + [Text.new("\n"), e, Text.new("\n")] + end +##### Email address + + def obfuscate(s) + res = '' + s.each_byte do |char| + res += "&#%03d;" % char + end + res + end + + def to_html_email_address + email = self.email + a = create_html_element 'a' + #a.attributes['href'] = Text.new("mailto:"+obfuscate(email),false,nil,true) + #a.attributes.add Attribute.new('href',Text.new( + #"mailto:"+obfuscate(email),false,nil,true)) + # Sorry, for the moment it doesn't work + a.attributes['href'] = "mailto:#{email}" + + a << Text.new(obfuscate(email),false,nil,true) + a + end + +##### Images + + def to_html_image + a = create_html_element 'img' + id = self.ref_id + if ref = @doc.refs[id] + url = ref[:url] + title = ref[:title] + a.attributes['src'] = url.to_s + a.attributes['alt'] = title.to_s + [:title, :class, :style].each do |s| + a.attributes[s.to_s] = ref[s] if ref[s] + end + else + maruku_error"Could not find id = #{id.inspect} for\n #{self.inspect}" + tell_user "Could not create image with ref_id = #{id.inspect};"+ + +" Using SPAN element as replacement." + return wrap_as_element('span') + end + return a + end + + def to_html_im_image + if not url = self.url + maruku_error"Image with no url: #{self.inspect}" + tell_user "Could not create image with ref_id = #{id.inspect};"+ + +" Using SPAN element as replacement." + return wrap_as_element('span') + end + title = self.title + a = create_html_element 'img' + a.attributes['src'] = url + a.attributes['alt'] = title.to_s + return a + end + + def to_html_raw_html + raw_html = self.raw_html + if rexml_doc = @parsed_html + root = rexml_doc.root + if root.nil? + s = "Bug in REXML: root() of Document is nil: \n#{rexml_doc.inspect}\n"+ + "Raw HTML:\n#{raw_html.inspect}" + maruku_error s + tell_user 'The REXML version you have has a bug, omitting HTML' + div = Element.new 'div' + #div << Text.new(s) + return div + end + + # copies the @children array (FIXME is it deep?) + elements = root.to_a + return elements + else # invalid + # Creates red box with offending HTML + tell_user "Wrapping bad html in a PRE with class 'markdown-html-error'\n"+ + add_tabs(raw_html,1,'|') + pre = Element.new('pre') + pre.attributes['style'] = 'border: solid 3px red; background-color: pink' + pre.attributes['class'] = 'markdown-html-error' + pre << Text.new("HTML parse error: \n#{raw_html}", true) + return pre + end + end + + def to_html_abbr + abbr = Element.new 'abbr' + abbr << Text.new(children[0]) + abbr.attributes['title'] = self.title if self.title + abbr + end + + def to_html_footnote_reference + id = self.footnote_id + + # save the order of used footnotes + order = @doc.footnotes_order + + # take next number + order << id + num = order.size; + + sup = Element.new 'sup' + sup.attributes['id'] = "fnref:#{num}" + a = Element.new 'a' + a << Text.new(num.to_s) + a.attributes['href'] = "\#fn:#{num}" + a.attributes['rel'] = 'footnote' + sup << a + + sup + end + +## Definition lists ### + def to_html_definition_list() add_ws wrap_as_element('dl') end + def to_html_definition() children_to_html end + def to_html_definition_term() add_ws wrap_as_element('dt') end + def to_html_definition_data() add_ws wrap_as_element('dd') end + + # FIXME: Ugly code + def to_html_table + align = self.align + num_columns = align.size + + head = @children.slice(0, num_columns) + rows = [] + i = num_columns + while i<@children.size + rows << @children.slice(i, num_columns) + i += num_columns + end + + table = create_html_element 'table' + thead = Element.new 'thead' + tr = Element.new 'tr' + array_to_html(head).each do |x| tr<<x end + thead << tr + table << thead + + tbody = Element.new 'tbody' + rows.each do |row| + tr = Element.new 'tr' + array_to_html(row).each_with_index do |x,i| + x.attributes['style'] ="text-align: #{align[i].to_s};" + tr<<x + end + + tbody << tr << Text.new("\n") + end + table << tbody + table + end + + def to_html_head_cell; wrap_as_element('th') end + def to_html_cell + if @attributes[:scope] + wrap_as_element('th', [:scope]) + else + wrap_as_element('td') + end + end + + def to_html_entity + MaRuKu::Out::Latex.need_entity_table + + entity_name = self.entity_name + + if (e = MaRuKu::Out::Latex::ENTITY_TABLE[entity_name]) && e.html_num + entity_name = e.html_num + end + + # Fix for Internet Explorer + if entity_name == 'apos' + entity_name = 39 + end + + + if entity_name.kind_of? Fixnum +# Entity.new(entity_name) + Text.new('&#%d;' % [entity_name], false, nil, true) + else + Text.new('&%s;' % [entity_name]) + end + end + + def to_html_xml_instr + target = self.target || '' + code = self.code || '' + REXML::Instruction.new(target, code) + end + + # Convert each child to html + def children_to_html + array_to_html(@children) + end + + def array_to_html(array) + e = [] + array.each do |c| + method = c.kind_of?(MDElement) ? + "to_html_#{c.node_type}" : "to_html" + + if not c.respond_to?(method) + #raise "Object does not answer to #{method}: #{c.class} #{c.inspect}" + next + end + + h = c.send(method) + + if h.nil? + raise "Nil html created by method #{method}:\n#{h.inspect}\n"+ + " for object #{c.inspect[0,300]}" + end + + if h.kind_of?Array + e = e + h #h.each do |hh| e << hh end + else + e << h + end + end + e + end + + def to_html_ref_definition; [] end + def to_latex_ref_definition; [] end + +end # HTML +end # out +end # MaRuKu diff --git a/lib/maruku/output/to_latex.rb b/lib/maruku/output/to_latex.rb new file mode 100644 index 00000000..0c91a176 --- /dev/null +++ b/lib/maruku/output/to_latex.rb @@ -0,0 +1,538 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + +module MaRuKu + +class MDDocument + + Latex_preamble_enc_cjk = +"\\usepackage[C40]{fontenc} +\\usepackage[cjkjis]{ucs} +\\usepackage[utf8x]{inputenc}" + + Latex_preamble_enc_utf8 = +"\\usepackage{ucs} +\\usepackage[utf8x]{inputenc}" + + def latex_require_package(p) + if not self.latex_required_packages.include? p + self.latex_required_packages.push p + end + end + + # Render as a LaTeX fragment + def to_latex + children_to_latex + end + +=begin maruku_doc +Attribute: maruku_signature +Scope: document +Output: html, latex +Summary: Enables Maruku's signature. +Default: true + +If false, Maruku does not append a signature to the +generated file. +=end + + # Render as a complete LaTeX document + def to_latex_document + body = to_latex + + if get_setting(:maruku_signature) + body += render_latex_signature + end + + required = + self.latex_required_packages.map {|p| + "\\usepackage{#{p}}\n" + }.join + +=begin maruku_doc +Attribute: latex_cjk +Scope: document +Output: latex +Summary: Support for CJK characters. + +If the `latex_cjk` attribute is specified, then appropriate headers +are added to the LaTeX preamble to support Japanese fonts. +You have to have these fonts installed -- and this can be a pain. + +If `latex_cjk` is specified, this is added to the preamble: + +<?mrk md_codeblock(Maruku::MDDocument::Latex_preamble_enc_cjk) ?> + +while the default is to add this: + +<?mrk md_codeblock(Maruku::MDDocument::Latex_preamble_enc_utf8) ?> + +=end + encoding = @doc.attributes[:latex_cjk] ? + Latex_preamble_enc_cjk : Latex_preamble_enc_utf8 + +=begin maruku_doc +Attribute: latex_preamble +Scope: document +Output: latex +Summary: User-defined preamble. + +If the `latex_preamble` attribute is specified, then its value +will be used as a custom preamble. + +For example: + + Title: My document + Latex preamble: preamble.tex + +will produce: + + ... + \input{preamble.tex} + ... + +=end + user_preamble = (file = @doc.attributes[:latex_preamble]) ? + "\\input{#{file}}\n" : "" + +"\\documentclass{article} + +% Packages required to support encoding +#{encoding} + +% Packages required by code +#{required} + +% Packages always used +\\usepackage{hyperref} +\\usepackage{xspace} +\\usepackage[usenames,dvipsnames]{color} +\\usepackage[margin=1in]{geometry} +\\hypersetup{colorlinks=true,urlcolor=blue} + +#{user_preamble} + +\\begin{document} +#{body} +\\end{document} +" + end + + + def render_latex_signature +"\\vfill +\\hrule +\\vspace{1.2mm} +\\begin{tiny} +Created by \\href{http://maruku.rubyforge.org}{Maruku} #{self.nice_date}. +\\end{tiny}" + end + +end end + +module MaRuKu; module Out; module Latex + + def to_latex_hrule; "\n\\vspace{.5em} \\hrule \\vspace{.5em}\n" end + def to_latex_linebreak; "\\linebreak " end + + def to_latex_paragraph + children_to_latex+"\n\n" + end + + +=begin maruku_doc +Title: Input format for colors +Output: latex, html +Related: code_background_color + +Admissible formats: + + green + #abc + #aabbcc +=end + + # \color[named]{name} + # \color[rgb]{1,0.2,0.3} + def latex_color(s, command='color') + if s =~ /^\#(\w\w)(\w\w)(\w\w)$/ + r = $1.hex; g = $2.hex; b=$3.hex + # convert from 0-255 to 0.0-1.0 + r = r / 255.0; g = g / 255.0; b = b / 255.0; + "\\#{command}[rgb]{%0.2f,%0.2f,%0.2f}" % [r,g,b] + elsif s =~ /^\#(\w)(\w)(\w)$/ + r = $1.hex; g = $2.hex; b=$3.hex + # convert from 0-15 to 0.0-1.0 + r = r / 15.0; g = g / 15.0; b = b / 15.0; + "\\#{command}[rgb]{%0.2f,%0.2f,%0.2f}" % [r,g,b] + else + "\\#{command}{#{s}}" + end + end + + + def to_latex_code; + raw_code = self.raw_code +=begin maruku_doc +Attribute: latex_use_listings +Scope: document +Output: latex +Summary: Support for `listings` package. +Related: code_show_spaces, code_background_color, lang, code_lang + +If the `latex_use_listings` attribute is specified, then +code block are rendered using the `listings` package. +Otherwise, a standard `verbatim` environment is used. + +* If the `lang` attribute for the code block has been specified, + it gets passed to the `listings` package using the `lstset` macro. + The default lang for code blocks is specified through + the `code_lang` attribute. + + \lstset{language=ruby} + + Please refer to the documentation of the `listings` package for + supported languages. + + If a language is not supported, the `listings` package will emit + a warning during the compilation. Just press enter and nothing + wrong will happen. + +* If the `code_show_spaces` is specified, than spaces and tabs will + be shown using the macro: + + \lstset{showspaces=true,showtabs=true} + +* The background color is given by `code_background_color`. + +=end + + if @doc.attributes[:latex_use_listings] + @doc.latex_require_package('listings') + + s = "\\lstset{columns=fixed,frame=shadowbox}" + + if get_setting(:code_show_spaces) + s+= "\\lstset{showspaces=true,showtabs=true}\n" + else + s+= "\\lstset{showspaces=false,showtabs=false}\n" + end + + color = latex_color get_setting(:code_background_color) + + s+= "\\lstset{backgroundcolor=#{color}}\n" + + s+= "\\lstset{basicstyle=\\ttfamily\\footnotesize}\n" + + + lang = self.attributes[:lang] || @doc.attributes[:code_lang] || '{}' + if lang + s += "\\lstset{language=#{lang}}\n" + end + + "#{s}\n\\begin{lstlisting}\n#{raw_code}\n\\end{lstlisting}" + else + "\\begin{verbatim}#{raw_code}\\end{verbatim}\n" + end + end + + TexHeaders = { + 1=>'section', + 2=>'subsection', + 3=>'subsubsection', + 4=>'paragraph'} + + def to_latex_header + h = TexHeaders[self.level] || 'paragraph' + + title = children_to_latex + if number = section_number + title = number + title + end + + if id = self.attributes[:id] + # drop '#' at the beginning + if id[0,1] == '#' then id = [1,id.size] end + %{\\hypertarget{%s}{}\\%s*{{%s}}\\label{%s}\n\n} % [ id, h, title, id ] + else + %{\\%s*{%s}\n\n} % [ h, title] + end + end + + + def to_latex_ul; + if self.attributes[:toc] + @doc.toc.to_latex + else + wrap_as_environment('itemize') + end + end + + def to_latex_quote; wrap_as_environment('quote') end + def to_latex_ol; wrap_as_environment('enumerate') end + def to_latex_li; + "\\item #{children_to_latex}\n" + end + def to_latex_li_span; + "\\item #{children_to_latex}\n" + end + + def to_latex_strong + "\\textbf{#{children_to_latex}}" + end + def to_latex_emphasis + "\\emph{#{children_to_latex}}" + end + + def wrap_as_span(c) + "{#{c} #{children_to_latex}}" + end + + def wrap_as_environment(name) +"\\begin{#{name}}% +#{children_to_latex} +\\end{#{name}}\n" + end + + SAFE_CHARS = Set.new([?\ ] + (?a..?z).to_a + (?A..?Z).to_a) + # the ultimate escaping + # (is much better than using \verb) + def latex_escape(source) + s=""; + + source.each_byte do |b| + if b == ?\ + s << '~' + elsif SAFE_CHARS.include? b + s << b + else + s += "\\char%d" % b + end + end + s + end + + def to_latex_inline_code; + source = self.raw_code + + # Convert to printable latex chars + s = latex_escape(source) + + color = get_setting(:code_background_color) + colorspec = latex_color(color, 'colorbox') + + "#{colorspec}{\\tt #{s}}" + end + + def to_latex_immediate_link + a = create_html_element 'a' + url = self.url + text = url.gsub(/^mailto:/,'') # don't show mailto +# gsub('~','$\sim$') + text = latex_escape(text) + if url[0,1] == '#' + url = url[1,url.size] + return "\\hyperlink{#{url}}{#{text}}" + else + + return "\\href{#{url}}{#{text}}" + end + end + + def to_latex_im_link + url = self.url + + if url[0,1] == '#' + url = url[1,url.size] + return "\\hyperlink{#{url}}{#{children_to_latex}}" + else + return "\\href{#{url}}{#{children_to_latex}}" + end + end + + def to_latex_link + id = self.ref_id + # if empty, use text + if id.size == 0 + id = children.to_s.downcase + end + + ref = @doc.refs[id] + if not ref + $stderr.puts "Could not find id = '#{id}'" + return children_to_latex + else + url = ref[:url] + #title = ref[:title] || 'no title' + + if url[0,1] == '#' + url = url[1,url.size] + return "\\hyperlink{#{url}}{#{children_to_latex}}" + else + return "\\href{#{url}}{#{children_to_latex}}" + end + end + + end + + def to_latex_email_address + email = self.email + "\\href{mailto:#{email}}{#{latex_escape(email)}}" + end + + + def to_latex_table + align = self.align + num_columns = align.size + + head = @children.slice(0, num_columns) + rows = [] + i = num_columns + while i<@children.size + rows << @children.slice(i, num_columns) + i+=num_columns + end + + h = {:center=>'c',:left=>'l',:right=>'r'} + align_string = align.map{|a| h[a]}.join('|') + + s = "\\begin{tabular}{#{align_string}}\n" + + s += array_to_latex(head, '&') + "\\\\" +"\n" + + s += "\\hline \n" + + rows.each do |row| + s += array_to_latex(row, '&') + "\\\\" +"\n" + end + + s += "\\end{tabular}" + + # puts table in its own paragraph + s += "\n\n" + + s + end + + + def to_latex_head_cell; children_to_latex end + def to_latex_cell; children_to_latex end + + + def to_latex_footnote_reference + id = self.footnote_id + f = @doc.footnotes[id] + if f + "\\footnote{#{f.children_to_latex.strip}} " + else + $stderr.puts "Could not find footnote '#{fid}'" + end + end + + def to_latex_raw_html + #'{\bf Raw HTML removed in latex version }' + "" + end + + ## Definition lists ### + def to_latex_definition_list + s = "\\begin{description}\n" + s += children_to_latex + s += "\\end{description}\n" + s + end + + def to_latex_definition + terms = self.terms + definitions = self.definitions + + s = "" + terms.each do |t| + s +="\n\\item[#{t.children_to_latex}] " + end + + definitions.each do |d| + s += "#{d.children_to_latex} \n" + end + + s + end + + + def to_latex_abbr + children_to_latex + end + + def to_latex_image + id = self.ref_id + ref = @doc.refs[id] + if not ref + $stderr.puts "Could not find id = '#{id}'" + "" + else + url = ref[:url] + "{\\bf Images not supported yet (#{latex_escape(url)})}" + end + + end + + + # Convert each child to html + def children_to_latex + array_to_latex(@children) + end + + def array_to_latex(array, join_char='') + e = [] + array.each do |c| + method = c.kind_of?(MDElement) ? + "to_latex_#{c.node_type}" : "to_latex" + + if not c.respond_to?(method) + # raise "Object does not answer to #{method}: #{c.class} #{c.inspect[0,100]}" + next + end + + h = c.send(method) + + if h.nil? + raise "Nil html for #{c.inspect} created with method #{method}" + end + + if h.kind_of?Array + e = e + h + else + e << h + end + end + + # puts a space after commands if needed + e.each_index do |i| + if e[i] =~ /\\\w+\s*$/ # command + if (s=e[i+1]) && s[0] == ?\ # space + e[i] = e[i] + "\\ " + end + end + end + + e.join(join_char) + end + +end end end # MaRuKu::Out::Latex diff --git a/lib/maruku/output/to_latex_entities.rb b/lib/maruku/output/to_latex_entities.rb new file mode 100644 index 00000000..2ada18b6 --- /dev/null +++ b/lib/maruku/output/to_latex_entities.rb @@ -0,0 +1,367 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +require 'rexml/document' + +module MaRuKu; module Out; module Latex + + include REXML + + def to_latex_entity + MaRuKu::Out::Latex.need_entity_table + + entity_name = self.entity_name + + entity = ENTITY_TABLE[entity_name] + if not entity + maruku_error "I don't know how to translate entity '#{entity_name}' "+ + "to LaTeX." + return "" + end + replace = entity.latex_string + + entity.latex_packages.each do |p| + @doc.latex_require_package p + end + + if replace =~ /^\\/ + replace = replace + " " + end + + if replace + return replace + else + tell_user "Cannot translate entity #{entity_name.inspect} to LaTeX." + return entity_name + end + end + + class LatexEntity + safe_attr_accessor :html_num, Fixnum + safe_attr_accessor :html_entity, String + safe_attr_accessor :latex_string, String + safe_attr_accessor :latex_packages, Array + end + + def Latex.need_entity_table + Latex.init_entity_table if ENTITY_TABLE.empty? + end + + # create hash @@entity_to_latex + def Latex.init_entity_table + $stderr.write "Creating entity table.." + $stderr.flush + doc = Document.new XML_TABLE + doc.elements.each("//char") do |c| + num = c.attributes['num'].to_i + name = c.attributes['name'] + package = c.attributes['package'] + + convert = c.attributes['convertTo'] + convert.gsub!(/@DOUBLEQUOT/,'"') + convert.gsub!(/@QUOT/,"'") + convert.gsub!(/@GT/,">") + convert.gsub!(/@LT/,"<") + convert.gsub!(/@AMP/,"&") + convert.freeze + + e = LatexEntity.new + e.html_num = num + e.html_entity = name + e.latex_string = convert + e.latex_packages = package ? package.split : [] + + ENTITY_TABLE[num] = e + ENTITY_TABLE[name] = e + end + $stderr.puts "..done." + end + + ENTITY_TABLE = {} + +# The following is a conversion chart for html elements, courtesy of +# text2html + + XML_TABLE =" + <chars> + <char num='913' name='Alpha' convertTo='$A$' /> + <char num='914' name='Beta' convertTo='$B$' /> + <char num='915' name='Gamma' convertTo='$\\Gamma$' /> + <char num='916' name='Delta' convertTo='$\\Delta$' /> + <char num='917' name='Epsilon' convertTo='$E$' /> + <char num='918' name='Zeta' convertTo='$Z$' /> + <char num='919' name='Eta' convertTo='$H$' /> + <char num='920' name='Theta' convertTo='$\\Theta$' /> + <char num='921' name='Iota' convertTo='$I$' /> + <char num='922' name='Kappa' convertTo='$K$' /> + <char num='923' name='Lambda' convertTo='$\\Lambda$' /> + <char num='924' name='Mu' convertTo='$M$' /> + <char num='925' name='Nu' convertTo='$N$' /> + <char num='926' name='Xi' convertTo='$\\Xi$' /> + <char num='927' name='Omicron' convertTo='$O$' /> + <char num='928' name='Pi' convertTo='$\\Pi$' /> + <char num='929' name='Rho' convertTo='$P$' /> + <char num='931' name='Sigma' convertTo='$\\Sigma$' /> + <char num='932' name='Tau' convertTo='$T$' /> + <char num='933' name='Upsilon' convertTo='$Y$' /> + <char num='934' name='Phi' convertTo='$\\Phi$' /> + <char num='935' name='Chi' convertTo='$X$' /> + <char num='936' name='Psi' convertTo='$\\Psi$' /> + <char num='937' name='Omega' convertTo='$\\Omega$' /> + <char num='945' name='alpha' convertTo='$\\alpha$' /> + <char num='946' name='beta' convertTo='$\\beta$' /> + <char num='947' name='gamma' convertTo='$\\gamma$' /> + <char num='948' name='delta' convertTo='$\\delta$' /> + <char num='949' name='epsilon' convertTo='$\\epsilon$' /> + <char num='950' name='zeta' convertTo='$\\zeta$' /> + <char num='951' name='eta' convertTo='$\\eta$' /> + <char num='952' name='theta' convertTo='$\\theta$' /> + <char num='953' name='iota' convertTo='$\\iota$' /> + <char num='954' name='kappa' convertTo='$\\kappa$' /> + <char num='955' name='lambda' convertTo='$\\lambda$' /> + <char num='956' name='mu' convertTo='$\\mu$' /> + + <char num='957' name='nu' convertTo='$\\nu$' /> + <char num='958' name='xi' convertTo='$\\xi$' /> + <char num='959' name='omicron' convertTo='$o$' /> + <char num='960' name='pi' convertTo='$\\pi$' /> + <char num='961' name='rho' convertTo='$\\rho$' /> + <char num='963' name='sigma' convertTo='$\\sigma$' /> + <char num='964' name='tau' convertTo='$\\tau$' /> + <char num='965' name='upsilon' convertTo='$\\upsilon$' /> + <char num='966' name='phi' convertTo='$\\phi$' /> + + <char num='967' name='chi' convertTo='$\\chi$' /> + <char num='968' name='psi' convertTo='$\\psi$' /> + <char num='969' name='omega' convertTo='$\\omega$' /> + <char num='962' name='sigmaf' convertTo='$\\varsigma$' /> + <char num='977' name='thetasym' convertTo='$\\vartheta$' /> + <char num='982' name='piv' convertTo='$\\varpi$' /> + <char num='8230' name='hellip' convertTo='\\ldots' /> + <char num='8242' name='prime' convertTo='$\\prime$' /> + <char num='8254' name='oline' convertTo='-' /> + + <char num='8260' name='frasl' convertTo='/' /> + <char num='8472' name='weierp' convertTo='$\\wp$' /> + <char num='8465' name='image' convertTo='$\\Im$' /> + <char num='8476' name='real' convertTo='$\\Re$' /> + <char num='8501' name='alefsym' convertTo='$\\aleph$' /> + <char num='8226' name='bull' convertTo='$\\bullet$' /> + <char num='8482' name='trade' convertTo='$^{\\rm TM}$' /> <!-- \texttrademark --> + <char num='8592' name='larr' convertTo='$\\leftarrow$' /> + + <char num='8594' name='rarr' convertTo='$\\rightarrow$' /> + <char num='8593' name='uarr' convertTo='$\\uparrow$' /> + <char num='8595' name='darr' convertTo='$\\downarrow$' /> + <char num='8596' name='harr' convertTo='$\\leftrightarrow$' /> + <char num='8629' name='crarr' convertTo='$\\hookleftarrow$' /> + <char num='8657' name='uArr' convertTo='$\\Uparrow$' /> + <char num='8659' name='dArr' convertTo='$\\Downarrow$' /> + <char num='8656' name='lArr' convertTo='$\\Leftarrow$' /> + <char num='8658' name='rArr' convertTo='$\\Rightarrow$' /> + + <char num='8660' name='hArr' convertTo='$\\Leftrightarrow$' /> + <char num='8704' name='forall' convertTo='$\\forall$' /> + <char num='8706' name='part' convertTo='$\\partial$' /> + <char num='8707' name='exist' convertTo='$\\exists$' /> + <char num='8709' name='empty' convertTo='$\\emptyset$' /> + <char num='8711' name='nabla' convertTo='$\\nabla$' /> + <char num='8712' name='isin' convertTo='$\\in$' /> + <char num='8715' name='ni' convertTo='$\\ni$' /> + <char num='8713' name='notin' convertTo='$\\notin$' /> + + <char num='8721' name='sum' convertTo='$\\sum$' /> + <char num='8719' name='prod' convertTo='$\\prod$' /> + <char num='8722' name='minus' convertTo='$-$' /> + <char num='8727' name='lowast' convertTo='$\\ast$' /> + <char num='8730' name='radic' convertTo='$\\surd$' /> + <char num='8733' name='prop' convertTo='$\\propto$' /> + <char num='8734' name='infin' convertTo='$\\infty$' /> + <char num='8736' name='ang' convertTo='$\\angle$' /> + <char num='8743' name='and' convertTo='$\\wedge$' /> + + <char num='8744' name='or' convertTo='$\\vee$' /> + <char num='8745' name='cup' convertTo='$\\cup$' /> + <char num='8746' name='cap' convertTo='$\\cap$' /> + <char num='8747' name='int' convertTo='$\\int$' /> + <char num='8756' name='there4' convertTo='$\\therefore$' package='amssymb' /> <!-- only AMS --> + <char num='8764' name='sim' convertTo='$\\sim$' /> + <char num='8776' name='asymp' convertTo='$\\approx$' /> + <char num='8773' name='cong' convertTo='$\\cong$' /> + + <char num='8800' name='ne' convertTo='$\\neq$' /> + <char num='8801' name='equiv' convertTo='$\\equiv$' /> + <char num='8804' name='le' convertTo='$\\leq$' /> + <char num='8805' name='ge' convertTo='$\\geq$' /> + <char num='8834' name='sub' convertTo='$\\subset$' /> + <char num='8835' name='sup' convertTo='$\\supset$' /> +<!-- <char num='8838' name='sube' convertTo='$\\subseteq$' />--> + <char num='8839' name='supe' convertTo='$\\supseteq$' /> +<!-- <char num='8836' name='nsub' convertTo='$\\nsubset$' /> <!-- only AMS --> + + <char num='8853' name='oplus' convertTo='$\\oplus$' /> + <char num='8855' name='otimes' convertTo='$\\otimes$' /> + <char num='8869' name='perp' convertTo='$\\perp$' /> + <char num='8901' name='sdot' convertTo='$\\cdot$' /> + <char num='8968' name='rceil' convertTo='$\\rceil$' /> + <char num='8969' name='lceil' convertTo='$\\lceil$' /> + <char num='8970' name='lfloor' convertTo='$\\lfloor$' /> + <char num='8971' name='rfloor' convertTo='$\\rfloor$' /> + <char num='9001' name='rang' convertTo='$\\rangle$' /> + + <char num='9002' name='lang' convertTo='$\\langle$' /> + <char num='9674' name='loz' convertTo='$\\lozenge$' package='amssymb' /> <!-- only AMS --> + <char num='9824' name='spades' convertTo='$\\spadesuit$' /> + <char num='9827' name='clubs' convertTo='$\\clubsuit$' /> + <char num='9829' name='hearts' convertTo='$\\heartsuit$' /> + <char num='9830' name='diams' convertTo='$\\diamondsuit$' /> + <char num='38' name='amp' convertTo='\\@AMP' /> +<!-- <char num='34' name='quot' convertTo='\\@DOUBLEQUOT' /> XXX --> + <char num='34' name='quot' convertTo='\"' /> + <char num='39' name='apos' convertTo=\"'\" /> + <char num='169' name='copy' convertTo='\\copyright' /> + + <char num='60' name='lt' convertTo='$@LT$' /> + <char num='62' name='gt' convertTo='$@GT$' /> + <char num='338' name='OElig' convertTo='\\OE' /> + <char num='339' name='oelig' convertTo='\\oe' /> + <char num='352' name='Scaron' convertTo='\\v{S}' /> + <char num='353' name='scaron' convertTo='\\v{s}' /> + <char num='376' name='Yuml' convertTo='\\\"Y' /> + <char num='710' name='circ' convertTo='\\textasciicircum' /> + <char num='732' name='tilde' convertTo='\\textasciitilde' /> + + <char num='8211' name='ndash' convertTo='--' /> + <char num='8212' name='mdash' convertTo='---' /> + <char num='8216' name='lsquo' convertTo='`' /> + <char num='8217' name='rsquo' convertTo=\"'\" /> <!-- XXXX --> + <char num='8220' name='ldquo' convertTo='``' /> + <char num='8221' name='rdquo' convertTo=\"''\" /> <!-- XXXX --> + <char num='8224' name='dagger' convertTo='\\dag' /> + <char num='8225' name='Dagger' convertTo='\\ddag' /> + <char num='8240' name='permil' convertTo='\\permil' package='wasysym' /> <!-- wasysym package --> + + <char num='8364' name='euro' convertTo='\\euro' package='eurosym' /> <!-- eurosym package --> + <char num='8249' name='lsaquo' convertTo='\\guilsinglleft' package='aeguill'/> + <char num='8250' name='rsaquo' convertTo='\\guilsinglright' package='aeguill' /> +<!-- <char num='160' name='nbsp' convertTo='\\nolinebreak' />--> + <char num='160' name='nbsp' convertTo='~' /> + <char num='161' name='iexcl' convertTo='\\textexclamdown' /> + <char num='163' name='pound' convertTo='\\pounds' /> + <char num='164' name='curren' convertTo='\\currency' package='wasysym' /> <!-- wasysym package --> + <char num='165' name='yen' convertTo='\\textyen' package='textcomp'/> <!-- textcomp --> + + <char num='166' name='brvbar' convertTo='\\brokenvert' /> <!-- wasysym --> + <char num='167' name='sect' convertTo='\\S' /> + <char num='171' name='laquo' convertTo='\\guillemotleft' package='aeguill'/> + <char num='187' name='raquo' convertTo='\\guillemotright' package='aeguill'/> + <char num='174' name='reg' convertTo='\\textregistered' /> + <char num='170' name='ordf' convertTo='\\textordfeminine' /> + <char num='172' name='not' convertTo='$\\neg$' /> + <!-- <char num='176' name='deg' convertTo='$\\degree$' /> <!-- mathabx --> + <char num='176' name='deg' convertTo='\\textdegree' package='textcomp'/> + + <char num='177' name='plusmn' convertTo='$\\pm$' /> + <char num='180' name='acute' convertTo='@QUOT' /> + <char num='181' name='micro' convertTo='$\\mu$' /> + <char num='182' name='para' convertTo='\\P' /> + <char num='183' name='middot' convertTo='$\\cdot$' /> + <char num='186' name='ordm' convertTo='\\textordmasculine' /> + <char num='162' name='cent' convertTo='\\cent' package='wasysym' /> + <char num='185' name='sup1' convertTo='$^1$' /> + + <char num='178' name='sup2' convertTo='$^2$' /> + <char num='179' name='sup3' convertTo='$^3$' /> + <char num='189' name='frac12' convertTo='$\\frac{1}{2}$' /> + <char num='188' name='frac14' convertTo='$\\frac{1}{4}$' /> + <char num='190' name='frac34' convertTo='$\\frac{3}{4}$' /> + <char num='192' name='Agrave' convertTo='\\`A' /> + <char num='193' name='Aacute' convertTo='\\@QUOTA' /> + <char num='194' name='Acirc' convertTo='\\^A' /> + <char num='195' name='Atilde' convertTo='\\~A' /> + + <char num='196' name='Auml' convertTo='\\@DOUBLEQUOTA' /> + <char num='197' name='Aring' convertTo='\\AA' /> + <char num='198' name='AElig' convertTo='\\AE' /> + <char num='199' name='Ccedil' convertTo='\\c{C}' /> + <char num='200' name='Egrave' convertTo='\\`E' /> + <char num='201' name='Eacute' convertTo='\\@QUOTE' /> + <char num='202' name='Ecirc' convertTo='\\^E' /> + <char num='203' name='Euml' convertTo='\\@DOUBLEQUOTE' /> + <char num='204' name='Igrave' convertTo='\\`I' /> + <char num='205' name='Iacute' convertTo='\\@QUOTI' /> + <char num='206' name='Icirc' convertTo='\\^I' /> + <char num='207' name='Iuml' convertTo='\\\"I' /> + <char num='208' name='ETH' convertTo='$\\eth$' /> <!-- AMS --> + <char num='209' name='Ntilde' convertTo='\\~N' /> + <char num='210' name='Ograve' convertTo='\\`O' /> + <char num='211' name='Oacute' convertTo='\\@QUOT O' /> + <char num='212' name='Ocirc' convertTo='\\^O' /> + <char num='213' name='Otilde' convertTo='\\~O' /> + <char num='214' name='Ouml' convertTo='\\@DOUBLEQUOTO' /> + <char num='215' name='times' convertTo='$\\times$' /> + <char num='216' name='Oslash' convertTo='\\O' /> + <char num='217' name='Ugrave' convertTo='\\`U' /> + <char num='218' name='Uacute' convertTo='\\@QUOTU' /> + <char num='219' name='Ucirc' convertTo='\\^U' /> + <char num='220' name='Uuml' convertTo='\\@DOUBLEQUOTU' /> + <char num='221' name='Yacute' convertTo='\\@QUOTY' /> + <char num='223' name='szlig' convertTo='\\ss' /> + <char num='224' name='agrave' convertTo='\\`a' /> + <char num='225' name='aacute' convertTo='\\@QUOTa' /> + <char num='226' name='acirc' convertTo='\\^a' /> + <char num='227' name='atilde' convertTo='\\~a' /> + <char num='228' name='auml' convertTo='\\@DOUBLEQUOTa' /> + <char num='229' name='aring' convertTo='\\aa' /> + <char num='230' name='aelig' convertTo='\\ae' /> + <char num='231' name='ccedil' convertTo='\\c{c}' /> + <char num='232' name='egrave' convertTo='\\`e' /> + <char num='233' name='eacute' convertTo='\\@QUOTe' /> + <char num='234' name='ecirc' convertTo='\\^e' /> + <char num='235' name='euml' convertTo='\\@DOUBLEQUOTe' /> + <char num='236' name='igrave' convertTo='\\`i' /> + <char num='237' name='iacute' convertTo='\\@QUOTi' /> + <char num='238' name='icirc' convertTo='\\^i' /> + <char num='239' name='iuml' convertTo='\\@DOUBLEQUOTi' /> + <char num='240' name='eth' convertTo='$\\eth$' package='amssymb'/> <!-- --> + <char num='241' name='ntilde' convertTo='\\~n' /> + <char num='242' name='ograve' convertTo='\\`o' /> + <char num='243' name='oacute' convertTo='\\@QUOTo' /> + <char num='244' name='ocirc' convertTo='\\^o' /> + <char num='245' name='otilde' convertTo='\\~o' /> + <char num='246' name='ouml' convertTo='\\@DOUBLEQUOTo' /> +<!-- <char num='247' name='divide' convertTo='$\\divide$' /> --> + <char num='248' name='oslash' convertTo='\\o' /> + <char num='249' name='ugrave' convertTo='\\`u' /> + <char num='250' name='uacute' convertTo='\\@QUOTu' /> + <char num='251' name='ucirc' convertTo='\\^u' /> + <char num='252' name='uuml' convertTo='\\@DOUBLEQUOTu' /> + <char num='253' name='yacute' convertTo='\\@QUOTy' /> + + <char num='255' name='yuml' convertTo='\\@DOUBLEQUOTy' /> + + <char num='222' name='THORN' convertTo='\\Thorn' package='wasysym' /> + <char num='254' name='thorn' convertTo='\\thorn' package='wasysym' /> + </chars>" + + +end end end + diff --git a/lib/maruku/output/to_latex_strings.rb b/lib/maruku/output/to_latex_strings.rb new file mode 100644 index 00000000..da043a79 --- /dev/null +++ b/lib/maruku/output/to_latex_strings.rb @@ -0,0 +1,64 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + + +class String + + # These are TeX's special characters + LATEX_ADD_SLASH = [ ?{, ?}, ?$, ?&, ?#, ?_, ?%] + + # These, we transform to {\tt \char<ascii code>} + LATEX_TO_CHARCODE = [ ?^, ?~, ?>,?<] + + def escape_to_latex(s) + s2 = "" + s.each_byte do |b| + if LATEX_TO_CHARCODE.include? b + s2 += "{\\tt \\char#{b}}" + elsif LATEX_ADD_SLASH.include? b + s2 << ?\\ << b + elsif b == ?\\ + # there is no backslash in cmr10 fonts + s2 += "$\\backslash$" + else + s2 << b + end + end + s2 + end + + # escapes special characters + def to_latex + s = escape_to_latex(self) + OtherGoodies.each do |k, v| + s.gsub!(k, v) + end + s + end + + # other things that are good on the eyes + OtherGoodies = { + /(\s)LaTeX/ => '\1\\LaTeX\\xspace ', # XXX not if already \LaTeX +# 'HTML' => '\\textsc{html}\\xspace ', +# 'PDF' => '\\textsc{pdf}\\xspace ' + } + +end \ No newline at end of file diff --git a/lib/maruku/output/to_markdown.rb b/lib/maruku/output/to_markdown.rb new file mode 100644 index 00000000..98d9322d --- /dev/null +++ b/lib/maruku/output/to_markdown.rb @@ -0,0 +1,164 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +class String + # XXX: markdown escaping + def to_md(c=nil) + to_s + end + + # " andrea censi " => [" andrea ", "censi "] + def mysplit + split.map{|x| x+" "} + end +end + + +module MaRuKu; module Out; module Markdown + + DefaultLineLength = 40 + + def to_md(context={}) + children_to_md(context) + end + + def to_md_paragraph(context) + line_length = context[:line_length] || DefaultLineLength + wrap(@children, line_length, context)+"\n" + end + + def to_md_li_span(context) + len = (context[:line_length] || DefaultLineLength) - 2 + s = add_tabs(wrap(@children, len-2, context), 1, ' ') + s[0] = ?* + s + "\n" + end + + def to_md_abbr_def(context) + "*[#{self.abbr}]: #{self.text}\n" + end + + def to_md_ol(context) + len = (context[:line_length] || DefaultLineLength) - 2 + md = "" + self.children.each_with_index do |li, i| + s = add_tabs(w=wrap(li.children, len-2, context), 1, ' ')+"\n" + s[0,4] = "#{i+1}. "[0,4] +# puts w.inspect + md += s + end + md + "\n" + end + + def to_md_ul(context) + len = (context[:line_length] || DefaultLineLength) - 2 + md = "" + self.children.each_with_index do |li, i| + w = wrap(li.children, len-2, context) +# puts "W: "+ w.inspect + s = add_indent(w) +# puts "S: " +s.inspect + s[0,1] = "-" + md += s + end + md + "\n" + end + + def add_indent(s,char=" ") + t = s.split("\n").map{|x| char+x }.join("\n") + s << ?\n if t[-1] == ?\n + s + end + + # Convert each child to html + def children_to_md(context) + array_to_md(@children, context) + end + + def wrap(array, line_length, context) + out = "" + line = "" + array.each do |c| + if c.kind_of?(MDElement) && c.node_type == :linebreak + out << line.strip << " \n"; line=""; + next + end + + pieces = + if c.kind_of? String + c.to_md.mysplit + else + [c.to_md(context)].flatten + end + + # puts "Pieces: #{pieces.inspect}" + pieces.each do |p| + if p.size + line.size > line_length + out << line.strip << "\n"; + line = "" + end + line << p + end + end + out << line.strip << "\n" if line.size > 0 + out << ?\n if not out[-1] == ?\n + out + end + + + def array_to_md(array, context, join_char='') + e = [] + array.each do |c| + method = c.kind_of?(MDElement) ? + "to_md_#{c.node_type}" : "to_md" + + if not c.respond_to?(method) + #raise "Object does not answer to #{method}: #{c.class} #{c.inspect[0,100]}" +# tell_user "Using default for #{c.node_type}" + method = 'to_md' + end + +# puts "#{c.inspect} created with method #{method}" + h = c.send(method, context) + + if h.nil? + raise "Nil md for #{c.inspect} created with method #{method}" + end + + if h.kind_of?Array + e = e + h + else + e << h + end + end + e.join(join_char) + end + +end end end + +module MaRuKu; class MDDocument + alias old_md to_md + def to_md(context={}) + s = old_md(context) +# puts s + s + end +end end \ No newline at end of file diff --git a/lib/maruku/output/to_s.rb b/lib/maruku/output/to_s.rb new file mode 100644 index 00000000..577660e0 --- /dev/null +++ b/lib/maruku/output/to_s.rb @@ -0,0 +1,53 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +module MaRuKu + +class MDElement + + # Strips all formatting from the string + def to_s + children_to_s + end + + def children_to_s + @children.join + end + + # Generate an id for headers. Assumes @children is set. + def generate_id + title = children_to_s + title.gsub!(/ /,'_') + title.downcase! + title.gsub!(/[^\w_]/,'') + title.strip! + + if title.size == 0 + $uid ||= 0 + $uid += 1 + title = "id#{$uid}" + end + + title + end +end + +end \ No newline at end of file diff --git a/lib/maruku/string_utils.rb b/lib/maruku/string_utils.rb new file mode 100644 index 00000000..80ffbb12 --- /dev/null +++ b/lib/maruku/string_utils.rb @@ -0,0 +1,184 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +# Boring stuff with strings. +module MaRuKu; module Strings + + def add_tabs(s,n=1,char="\t") + s.split("\n").map{|x| char*n+x }.join("\n") + end + + TabSize = 4; + + def split_lines(s) + s.split("\n") + end + + # This parses email headers. Returns an hash. + # + # +hash['data']+ is the message. + # + # Keys are downcased, space becomes underscore, converted to symbols. + # + # My key: true + # + # becomes: + # + # {:my_key => true} + # + def parse_email_headers(s) + keys={} + match = (s =~ /((\w[\w\s]+: .*\n)+)\n/) + if match != 0 + keys[:data] = s + else + keys[:data] = $' + headers = $1 + headers.split("\n").each do |l| + k, v = l.split(':') + k, v = normalize_key_and_value(k, v) + k = k.to_sym +# puts "K = #{k}, V=#{v}" + keys[k] = v + end + end + keys + end + + # Keys are downcased, space becomes underscore, converted to symbols. + def normalize_key_and_value(k,v) + v = v ? v.strip : true # no value defaults to true + k = k.strip + + # check synonyms + v = true if ['yes','true'].include?(v.to_s.downcase) + v = false if ['no','false'].include?(v.to_s.downcase) + + k = k.downcase.gsub(' ','_') + return k, v + end + + # Returns the number of leading spaces, considering that + # a tab counts as `TabSize` spaces. + def number_of_leading_spaces(s) + n=0; i=0; + while i < s.size + c = s[i,1] + if c == ' ' + i+=1; n+=1; + elsif c == "\t" + i+=1; n+=TabSize; + else + break + end + end + n + end + + # This returns the position of the first real char in a list item + # + # For example: + # '*Hello' # => 1 + # '* Hello' # => 2 + # ' * Hello' # => 3 + # ' * Hello' # => 5 + # '1.Hello' # => 2 + # ' 1. Hello' # => 5 + + def spaces_before_first_char(s) + case s.md_type + when :ulist + i=0; + # skip whitespace if present + while s[i,1] =~ /\s/; i+=1 end + # skip indicator (+, -, *) + i+=1 + # skip optional whitespace + while s[i,1] =~ /\s/; i+=1 end + return i + when :olist + i=0; + # skip whitespace + while s[i,1] =~ /\s/; i+=1 end + # skip digits + while s[i,1] =~ /\d/; i+=1 end + # skip dot + i+=1 + # skip whitespace + while s[i,1] =~ /\s/; i+=1 end + return i + else + tell_user "BUG (my bad): '#{s}' is not a list" + 0 + end + end + + # Counts the number of leading '#' in the string + def num_leading_hashes(s) + i=0; + while i<(s.size-1) && (s[i,1]=='#'); i+=1 end + i + end + + # Strips initial and final hashes + def strip_hashes(s) + s = s[num_leading_hashes(s), s.size] + i = s.size-1 + while i > 0 && (s[i,1] =~ /(#|\s)/); i-=1; end + s[0, i+1].strip + end + + + # removes initial quote + def unquote(s) + s.gsub(/^>\s?/,'') + end + + # toglie al massimo n caratteri + def strip_indent(s, n) + i = 0 + while i < s.size && n>0 + c = s[i,1] + if c == ' ' + n-=1; + elsif c == "\t" + n-=TabSize; + else + break + end + i+=1 + end + s[i, s.size-1] + end + + def dbg_describe_ary(a, prefix='') + i = 0 + a.each do |l| + puts "#{prefix} (#{i+=1})# #{l.inspect}" + end + end + + def force_linebreak?(l) + l =~ / $/ + end + +end +end diff --git a/lib/maruku/structures.rb b/lib/maruku/structures.rb new file mode 100644 index 00000000..937e4a5b --- /dev/null +++ b/lib/maruku/structures.rb @@ -0,0 +1,165 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + + +class Module + def safe_attr_accessor1(symbol, klass) + attr_reader symbol + code = <<-EOF + def #{symbol}=(val) + if not val.kind_of? #{klass} + s = "\nCould not assign an object of type \#{val.class} to #{symbol}.\n\n" + s += "Tried to assign object of class \#{val.class}:\n"+ + "\#{val.inspect}\n"+ + "to \#{self.class}::#{symbol} constrained to be of class #{klass}.\n" + raise s + end + @#{symbol} = val + end + +EOF + module_eval code + end + + def safe_attr_accessor2(symbol, klass) + attr_accessor symbol + end + + alias safe_attr_accessor safe_attr_accessor2 +end + +module MaRuKu + +# I did not want to have a class for each possible element. +# Instead I opted to have only the class "MDElement" +# that represents eveything in the document (paragraphs, headers, etc). +# +# You can tell what it is by the variable `node_type`. +# +# In the instance-variable `children` there are the children. These +# can be of class 1) String or 2) MDElement. +# +# The @doc variable points to the document to which the MDElement +# belongs (which is an instance of Maruku, subclass of MDElement). +# +# Attributes are contained in the hash `attributes`. +# Keys are symbols (downcased, with spaces substituted by underscores) +# +# For example, if you write in the source document. +# +# Title: test document +# My property: value +# +# content content +# +# You can access `value` by writing: +# +# @doc.attributes[:my_property] # => 'value' +# +# from whichever MDElement in the hierarchy. +# +class MDElement + # See helpers.rb for the list of allowed #node_type values + safe_attr_accessor :node_type, Symbol + + # Children are either Strings or MDElement + safe_attr_accessor :children, Array + + # An attribute list, may not be nil + safe_attr_accessor :al, Array #Maruku::AttributeList + + # These are the processed attributes + safe_attr_accessor :attributes, Hash + + # Reference of the document (which is of class Maruku) + attr_accessor :doc + + def initialize(node_type=:unset, children=[], meta={}, + al=MaRuKu::AttributeList.new ) + super(); + self.children = children + self.node_type = node_type + + @attributes = {} + + meta.each do |symbol, value| + self.instance_eval " + def #{symbol}; @#{symbol}; end + def #{symbol}=(val); @#{symbol}=val; end" + self.send "#{symbol}=", value + end + + self.al = al || AttributeList.new + + self.meta_priv = meta + end + + attr_accessor :meta_priv + + def ==(o) + ok = o.kind_of?(MDElement) && + (self.node_type == o.node_type) && + (self.meta_priv == o.meta_priv) && + (self.children == o.children) + + if not ok +# puts "This:\n"+self.inspect+"\nis different from\n"+o.inspect+"\n\n" + end + ok + end +end + +# This represents the whole document and holds global data. + +class MDDocument + + safe_attr_accessor :refs, Hash + safe_attr_accessor :footnotes, Hash + + # This is an hash. The key might be nil. + safe_attr_accessor :abbreviations, Hash + + # Attribute lists definition + safe_attr_accessor :ald, Hash + + # The order in which footnotes are used. Contains the id. + safe_attr_accessor :footnotes_order, Array + + safe_attr_accessor :latex_required_packages, Array + + def initialize(s=nil) + super(:document) + @doc = self + + self.refs = {} + self.footnotes = {} + self.footnotes_order = [] + self.abbreviations = {} + self.ald = {} + self.latex_required_packages = [] + + parse_doc(s) if s + end +end + + +end # MaRuKu + diff --git a/lib/maruku/structures_inspect.rb b/lib/maruku/structures_inspect.rb new file mode 100644 index 00000000..c0064f90 --- /dev/null +++ b/lib/maruku/structures_inspect.rb @@ -0,0 +1,87 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + + +class String + def inspect_more(a=nil,b=nil) + inspect + end +end + +class Object + def inspect_more(a=nil,b=nil) + inspect + end +end + +class Array + def inspect_more(compact, join_string, add_brackets=true) + s = map {|x| + x.kind_of?(String) ? x.inspect : + x.kind_of?(MaRuKu::MDElement) ? x.inspect(compact) : + (raise "WTF #{x.class} #{x.inspect}") + }.join(join_string) + + add_brackets ? "[#{s}]" : s + end +end + +class Hash + def inspect_ordered(a=nil,b=nil) + "{"+keys.map{|x|x.to_s}.sort.map{|x|x.to_sym}. + map{|k| k.inspect + "=>"+self[k].inspect}.join(',')+"}" + end +end + +module MaRuKu +class MDElement + def inspect(compact=true) + if compact + i2 = inspect2 + return i2 if i2 + end + + "md_el(:%s,%s,%s,%s)" % + [ + self.node_type, + children_inspect(compact), + @meta_priv.inspect_ordered, + self.al.inspect + ] + end + + def children_inspect(compact=true) + s = @children.inspect_more(compact,', ') + if @children.empty? + "[]" + elsif s.size < 70 + s + else + "[\n"+ + add_tabs(@children.inspect_more(compact,",\n",false))+ + "\n]" + end + end + +end + +end + diff --git a/lib/maruku/structures_iterators.rb b/lib/maruku/structures_iterators.rb new file mode 100644 index 00000000..ff9c6b43 --- /dev/null +++ b/lib/maruku/structures_iterators.rb @@ -0,0 +1,61 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +module MaRuKu + +class MDElement + + # Yields to each element of specified node_type + # All elements if e_node_type is nil. + def each_element(e_node_type=nil, &block) + @children.each do |c| + if c.kind_of? MDElement + if (not e_node_type) || (e_node_type == c.node_type) + block.call c + end + c.each_element(e_node_type, &block) + end + end + end + + # Apply passed block to each String in the hierarchy. + def replace_each_string(&block) + for c in @children + if c.kind_of? MDElement + c.replace_each_string(&block) + end + end + + processed = [] + until @children.empty? + c = @children.shift + if c.kind_of? String + result = block.call(c) + [*result].each do |e| processed << e end + else + processed << c + end + end + @children = processed + end + +end +end \ No newline at end of file diff --git a/lib/maruku/tests/benchmark.rb b/lib/maruku/tests/benchmark.rb new file mode 100644 index 00000000..9854e10b --- /dev/null +++ b/lib/maruku/tests/benchmark.rb @@ -0,0 +1,82 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +require 'maruku' +require 'bluecloth' + + +data = $stdin.read + +num = 10 + +if ARGV.size > 0 && ((n=ARGV[0].to_i) != 0) + num = n +end + +methods = +[ + + [Maruku, :to_html], + [BlueCloth, :to_html], + [Maruku, :to_latex] + +] + +#methods = [[Maruku, :class]] +#num = 10 + +stats = +methods .map do |c, method| + puts "Computing for #{c}" + + start = Time.now + doc = nil + for i in 1..num + $stdout.write "#{i} "; $stdout.flush + doc = c.new(data) + end + stop = Time.now + parsing = (stop-start)/num + + start = Time.now + for i in 1..num + $stdout.write "#{i} "; $stdout.flush + s = doc.send method + end + stop = Time.now + rendering = (stop-start)/num + + puts ("%s (%s): parsing %0.2f sec + rendering %0.2f sec "+ + "= %0.2f sec ") % [c, method, parsing,rendering,parsing+rendering] + + [c, method, parsing, rendering] +end + +puts "\n\n\n" +stats.each do |x| x.push(x[2]+x[3]) end +max = stats.map{|x|x[4]}.max +stats.sort! { |x,y| x[4] <=> y[4] } . reverse! +for c, method, parsing, rendering, tot in stats + puts ("%20s: parsing %0.2f sec + rendering %0.2f sec "+ + "= %0.2f sec (%0.2fx)") % + ["#{c} (#{method})", parsing,rendering,tot,max/tot] +end + diff --git a/lib/maruku/tests/new_parser.rb b/lib/maruku/tests/new_parser.rb new file mode 100644 index 00000000..c2229552 --- /dev/null +++ b/lib/maruku/tests/new_parser.rb @@ -0,0 +1,359 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +require 'maruku' + +module MaRuKu; module Tests + # 5 accented letters in italian, encoded as UTF-8 + AccIta8 = "\303\240\303\250\303\254\303\262\303\271" + + # Same letters, written in ISO-8859-1 (one byte per letter) + AccIta1 = "\340\350\354\362\371" + + # The word MA-RU-KU, written in katakana using UTF-8 + Maruku8 = "\343\203\236\343\203\253\343\202\257" + + def test_span_parser(verbose, break_on_first_error, quiet) + good_cases = [ + + ["", [], 'Empty string gives empty list'], + ["a", ["a"], 'Easy char'], + [" a", ["a"], 'First space in the paragraph is ignored'], + ["a\n \n", ["a"], 'Last spaces in the paragraphs are ignored'], + [' ', [], 'One char => nothing'], + [' ', [], 'Two chars => nothing'], + ['a b', ['a b'], 'Spaces are compressed'], + ['a b', ['a b'], 'Newlines are spaces'], + ["a\nb", ['a b'], 'Newlines are spaces'], + ["a\n b", ['a b'], 'Compress newlines 1'], + ["a \nb", ['a b'], 'Compress newlines 2'], + [" \nb", ['b'], 'Compress newlines 3'], + ["\nb", ['b'], 'Compress newlines 4'], + ["b\n", ['b'], 'Compress newlines 5'], + ["\n", [], 'Compress newlines 6'], + ["\n\n\n", [], 'Compress newlines 7'], + + [nil, :throw, "Should throw on nil input"], + + # Code blocks + ["`" , :throw, 'Unclosed single ticks'], + ["``" , :throw, 'Unclosed double ticks'], + ["`a`" , [md_code('a')], 'Simple inline code'], + ["`` ` ``" , [md_code('`')], ], + ["`` \\` ``" , [md_code('\\`')], ], + ["``a``" , [md_code('a')], ], + ["`` a ``" , [md_code('a')], ], + + # Newlines + ["a \n", ['a',md_el(:linebreak)], 'Two spaces give br.'], + ["a \n", ['a'], 'Newlines 2'], + [" \n", [md_el(:linebreak)], 'Newlines 3'], + [" \n \n", [md_el(:linebreak),md_el(:linebreak)],'Newlines 3'], + [" \na \n", [md_el(:linebreak),'a',md_el(:linebreak)],'Newlines 3'], + + # Inline HTML + ["a < b", ['a < b'], '< can be on itself'], + ["<hr>", [md_html('<hr />')], 'HR will be sanitized'], + ["<hr/>", [md_html('<hr />')], 'Closed tag is ok'], + ["<hr />", [md_html('<hr />')], 'Closed tag is ok 2'], + ["<hr/>a", [md_html('<hr />'),'a'], 'Closed tag is ok 2'], + ["<em></em>a", [md_html('<em></em>'),'a'], 'Inline HTML 1'], + ["<em>e</em>a", [md_html('<em>e</em>'),'a'], 'Inline HTML 2'], + ["a<em>e</em>b", ['a',md_html('<em>e</em>'),'b'], 'Inline HTML 3'], + ["<em>e</em>a<em>f</em>", + [md_html('<em>e</em>'),'a',md_html('<em>f</em>')], + 'Inline HTML 4'], + ["<em>e</em><em>f</em>a", + [md_html('<em>e</em>'),md_html('<em>f</em>'),'a'], + 'Inline HTML 5'], + + ["<img src='a' />", [md_html("<img src='a' />")], 'Attributes'], + ["<img src='a'/>"], + + # emphasis + ["**", :throw, 'Unclosed double **'], + ["\\*", ['*'], 'Escaping of *'], + ["a *b* ", ['a ', md_em('b')], 'Emphasis 1'], + ["a *b*", ['a ', md_em('b')], 'Emphasis 2'], + ["a * b", ['a * b'], 'Emphasis 3'], + ["a * b*", :throw, 'Unclosed emphasis'], + # same with underscore + ["__", :throw, 'Unclosed double __'], + ["\\_", ['_'], 'Escaping of _'], + ["a _b_ ", ['a ', md_em('b')], 'Emphasis 4'], + ["a _b_", ['a ', md_em('b')], 'Emphasis 5'], + ["a _ b", ['a _ b'], 'Emphasis 6'], + ["a _ b_", :throw, 'Unclosed emphasis'], + ["_b_", [md_em('b')], 'Emphasis 7'], + ["_b_ _c_", [md_em('b'),' ',md_em('c')], 'Emphasis 8'], + ["_b__c_", [md_em('b'),md_em('c')], 'Emphasis 9'], + # strong + ["**a*", :throw, 'Unclosed double ** 2'], + ["\\**a*", ['*', md_em('a')], 'Escaping of *'], + ["a **b** ", ['a ', md_strong('b')], 'Emphasis 1'], + ["a **b**", ['a ', md_strong('b')], 'Emphasis 2'], + ["a ** b", ['a ** b'], 'Emphasis 3'], + ["a ** b**", :throw, 'Unclosed emphasis'], + ["**b****c**", [md_strong('b'),md_strong('c')], 'Emphasis 9'], + # strong (with underscore) + ["__a_", :throw, 'Unclosed double __ 2'], + ["\\__a_", ['_', md_em('a')], 'Escaping of _'], + ["a __b__ ", ['a ', md_strong('b')], 'Emphasis 1'], + ["a __b__", ['a ', md_strong('b')], 'Emphasis 2'], + ["a __ b", ['a __ b'], 'Emphasis 3'], + ["a __ b__", :throw, 'Unclosed emphasis'], + ["__b____c__", [md_strong('b'),md_strong('c')], 'Emphasis 9'], + # extra strong + ["***a**", :throw, 'Unclosed triple *** '], + ["\\***a**", ['*', md_strong('a')], 'Escaping of *'], + ["a ***b*** ", ['a ', md_emstrong('b')], 'Strong elements'], + ["a ***b***", ['a ', md_emstrong('b')]], + ["a *** b", ['a *** b']], + ["a ** * b", ['a ** * b']], + ["***b******c***", [md_emstrong('b'),md_emstrong('c')]], + ["a *** b***", :throw, 'Unclosed emphasis'], + # same with underscores + ["___a__", :throw, 'Unclosed triple *** '], + ["\\___a__", ['_', md_strong('a')], 'Escaping of *'], + ["a ___b___ ", ['a ', md_emstrong('b')], 'Strong elements'], + ["a ___b___", ['a ', md_emstrong('b')]], + ["a ___ b", ['a ___ b']], + ["a __ _ b", ['a __ _ b']], + ["___b______c___", [md_emstrong('b'),md_emstrong('c')]], + ["a ___ b___", :throw, 'Unclosed emphasis'], + # mixing is bad + ["*a_", :throw, 'Mixing is bad'], + ["_a*", :throw], + ["**a__", :throw], + ["__a**", :throw], + ["___a***", :throw], + ["***a___", :throw], + # links of the form [text][ref] + ["\\[a]", ["[a]"], 'Escaping 1'], + ["\\[a\\]", ["[a]"], 'Escaping 2'], + ["[a]", ["a"], 'Not a link'], + ["[a][]", [ md_link(["a"],'')], 'Empty link'], + ["[a][]b", [ md_link(["a"],''),'b'], 'Empty link'], + ["[a\\]][]", [ md_link(["a]"],'')], 'Escape inside link'], + + ["[a", :throw, 'Link not closed'], + ["[a][", :throw, 'Ref not closed'], + + # links of the form [text](url) + ["\\[a](b)", ["[a](b)"], 'Links'], + ["[a](url)c", [md_im_link(['a'],'url'),'c'], 'url'], + ["[a]( url )c" ], + ["[a] ( url )c" ], + ["[a] ( url)c" ], + + ["[a](ur:/l/ 'Title')", [md_im_link(['a'],'ur:/l/','Title')], + 'url and title'], + ["[a] ( ur:/l/ \"Title\")" ], + ["[a] ( ur:/l/ \"Title\")" ], + ["[a]( ur:/l/ Title)", :throw, "Must quote title" ], + + ["[a](url 'Tit\\\"l\\\\e')", [md_im_link(['a'],'url','Tit"l\\e')], + 'url and title escaped'], + ["[a] ( url \"Tit\\\"l\\\\e\")" ], + ["[a] ( url \"Tit\\\"l\\\\e\" )" ], + ['[a] ( url "Tit\\"l\\\\e" )' ], + ["[a]()", [md_im_link(['a'],'')], 'No URL is OK'], + + ["[a](\"Title\")", :throw, "No url specified" ], + ["[a](url \"Title)", :throw, "Unclosed quotes" ], + ["[a](url \"Title\\\")", :throw], + ["[a](url \"Title\" ", :throw], + + ["[a](url \'Title\")", :throw, "Mixing is bad" ], + ["[a](url \"Title\')"], + + ["[a](/url)", [md_im_link(['a'],'/url')], 'Funny chars in url'], + ["[a](#url)", [md_im_link(['a'],'#url')]], + ["[a](</script?foo=1&bar=2>)", [md_im_link(['a'],'/script?foo=1&bar=2')]], + + + # Images + ["\\![a](url)", ['!', md_im_link(['a'],'url') ], 'Escaping images'], + + ["![a](url)", [md_im_image(['a'],'url')], 'Image no title'], + ["![a]( url )" ], + ["![a] ( url )" ], + ["![a] ( url)" ], + + ["![a](url 'ti\"tle')", [md_im_image(['a'],'url','ti"tle')], 'Image with title'], + ['![a]( url "ti\\"tle")' ], + + ["![a](url", :throw, 'Invalid images'], + ["![a( url )" ], + ["![a] ('url )" ], + + ["![a][imref]", [md_image(['a'],'imref')], 'Image with ref'], + ["![a][ imref]"], + ["![a][ imref ]"], + ["![a][\timref\t]"], + + + ['<http://example.com/?foo=1&bar=2>', + [md_url('http://example.com/?foo=1&bar=2')], 'Immediate link'], + ['a<http://example.com/?foo=1&bar=2>b', + ['a',md_url('http://example.com/?foo=1&bar=2'),'b'] ], + ['<andrea@censi.org>', + [md_email('andrea@censi.org')], 'Email address'], + ['<mailto:andrea@censi.org>'], + ["Developmen <http://rubyforge.org/projects/maruku/>", + ["Developmen ", md_url("http://rubyforge.org/projects/maruku/")]], + ["a<!-- -->b", ['a',md_html('<!-- -->'),'b'], + 'HTML Comment'], + + ["a<!--", :throw, 'Bad HTML Comment'], + ["a<!-- ", :throw, 'Bad HTML Comment'], + + ["<?xml <?!--!`3 ?>", [md_xml_instr('xml','<?!--!`3')], 'XML processing instruction'], + ["<? <?!--!`3 ?>", [md_xml_instr('','<?!--!`3')] ], + + ["<? ", :throw, 'Bad Server directive'], + + ["a <b", :throw, 'Bad HTML 1'], + ["<b", :throw, 'Bad HTML 2'], + ["<b!", :throw, 'Bad HTML 3'], + ['`<div>`, `<table>`, `<pre>`, `<p>`', + [md_code('<div>'),', ',md_code('<table>'),', ', + md_code('<pre>'),', ',md_code('<p>')], + 'Multiple HTLM tags'], + + ["&andrea", ["&andrea"], 'Parsing of entities'], +# no escaping is allowed +# ["\\&andrea;", ["&andrea;"]], + ["l&andrea;", ["l", md_entity('andrea')] ], + ["&&andrea;", ["&", md_entity('andrea')] ], + ["&123;;&",[md_entity('123'),';',md_entity('amp')]], + + ["a\nThe [syntax page] [s] provides", + ['a The ', md_link(['syntax page'],'s'), ' provides'], 'Regression'], + + ['![a](url "ti"tle")', [md_im_image(['a'],'url','ti"tle')], + "Image with quotes"], + ['![a](url \'ti"tle\')' ], + + ['[bar](/url/ "Title with "quotes" inside")', + [md_im_link(["bar"],'/url/', 'Title with "quotes" inside')], + "Link with quotes"], + + ['$20,000 and $30,000', ['$20,000 and $30,000'], 'Math: spaces'], + ['$20,000$', [md_inline_math('20,000')]], + ['$ 20,000$', ['$ 20,000$']], + ['$20,000 $ $20,000$', ['$20,000 $ ', md_inline_math('20,000')]], + ["#{Maruku8}", [Maruku8], "Reading UTF-8"], + ["#{AccIta1}", [AccIta8], "Converting ISO-8859-1 to UTF-8", + {:encoding => 'iso-8859-1'}], + + ] + + good_cases = unit_tests_for_attribute_lists + good_cases + + count = 1; last_comment=""; last_expected=:throw + good_cases.each do |t| + if not t[1] + t[1] = last_expected + else + last_expected = t[1] + end + if not t[2] + t[2] = last_comment + " #{count+=1}" + else + last_comment = t[2]; count=1 + end + end + + + + @verbose = verbose + m = Maruku.new + m.attributes[:on_error] = :raise + Globals[:debug_keep_ials] = true + + good_cases.each do |input, expected, comment| + output = nil + begin + output = m.parse_span_better(input) + #lines = Maruku.split_lines input + #output = m.parse_lines_as_span(lines) + rescue Exception => e + if not expected == :throw + ex = e.inspect+ "\n"+ e.backtrace.join("\n") + s = comment+describe_difference(input, expected, output) + + print_status(comment,'CRASHED :-(', ex+s) + raise e if @break_on_first_error + else + quiet || print_status(comment,'OK') + end + end + + if not expected == :throw + if not (expected == output) + s = comment+describe_difference(input, expected, output) + print_status(comment, 'FAILED', s) + break if break_on_first_error + else + quiet || print_status(comment, 'OK') + end + else # I expected a raise + if output + s = comment+describe_difference(input, expected, output) + + print_status(comment, 'FAILED (no throw)', s) + break if break_on_first_error + end + end + + end + end + + PAD=40 + def print_status(comment, status, verbose_text=nil) + if comment.size < PAD + comment = comment + (" "*(PAD-comment.size)) + end + puts "- #{comment} #{status}" + if @verbose and verbose_text + puts verbose_text + end + end + + + def describe_difference(input, expected, output) + "\nInput:\n #{input.inspect}" + + "\nExpected:\n #{expected.inspect}" + + "\nOutput:\n #{output.inspect}\n" + end +end end + +class Maruku + include MaRuKu::Tests +end + +verbose = ARGV.include? 'v' +break_on_first = ARGV.include? 'b' +quiet = ARGV.include? 'q' +Maruku.new.test_span_parser(verbose, break_on_first, quiet) + + diff --git a/lib/maruku/tests/tests.rb b/lib/maruku/tests/tests.rb new file mode 100644 index 00000000..6ecf9265 --- /dev/null +++ b/lib/maruku/tests/tests.rb @@ -0,0 +1,136 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +require 'maruku' + +class Maruku + + + def Maruku.failed(test, doc, s) + raise "Test failed: #{s}\n*****\n#{test}\n*****\n"+ + "#{doc.inspect}\n*****\n{doc.to_html}" + end + + def Maruku.metaTests + ref = {:id => 'id1', :class => ['class1','class2'], + :style=> 'Style is : important = for all } things'} + + + tests = MetaTests.split('***') + for test in tests + #puts "Test: #{test.inspect}" + doc = Maruku.new(test) + + doc.children.size == 1 || + failed(test, doc, "children != 1") + + + h = doc.children[0] + + h.node_type==:header || + failed(test, doc, "child not header") + +# puts doc.inspect +# puts doc.to_html + end + end + +MetaTests = <<EOF + +# Head # {ref1 ref2 ref3} + +{ref1}: id: id1; class: class1 +{ref2}: class: class2 +{ref3}: style: "Style is : important = for all } things" + +*** + +# Head # {ref1 ref3 ref2} + +{ref1}: id: id1; class: class1 +{ref2}: class: class2 +{ref3}: style: "Style is : important = for all } things" + +*** + +# Head # {ref1 ref2 ref3} + +{ref1}: id= id1; class=class1 +{ref2}: class=class2 +{ref3}: style="Style is : important = for all } things" + +*** + +# Head # {ref1 ref2 ref3} + +{ref1}: id=id1 class=class1 +{ref2}: class=class2 +{ref3}: style="Style is : important = for all } things" + +*** +# Head # {ref1 ref2 ref3} + +{ref1}: id:id1 class:class1 +{ref2}: class : class2 +{ref3}: style = "Style is : important = for all } things" + +*** +# Head # {ref1 ref2 ref3} + +{ref1}: id:id1 class:class1 + {ref2}: class : class2 + {ref3}: style = "Style is : important = for all } things" + +*** + +# Head # {#id1 .class1 ref2 ref3} + +{ref2}: class : class2 +{ref3}: style = "Style is : important = for all } things" + +*** + +# Head # { #id1 .class1 ref2 ref3 } + +{ref2}: class : class2 +{ref3}: style = "Style is : important = for all } things" + +*** + +# Head # { id=id1 class=class1 ref2 ref3 } + +{ref2}: class : class2 +{ref3}: style = "Style is : important = for all } things" + +*** + +# Head # { id:id1 class="class1" class:"class2" style="Style is : important = for all } things"} + +EOF + +end + +if File.basename($0) == 'tests.rb' + Maruku.metaTests + +end + + diff --git a/lib/maruku/toc.rb b/lib/maruku/toc.rb new file mode 100644 index 00000000..fc717bff --- /dev/null +++ b/lib/maruku/toc.rb @@ -0,0 +1,199 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + + +module MaRuKu + +class MDDocument + # an instance of Section (see below) + attr_accessor :toc +end + + # This represents a section in the TOC. + class Section + # a Fixnum, is == header_element.level + attr_accessor :section_level + + # An array of fixnum, like [1,2,5] for Section 1.2.5 + attr_accessor :section_number + + # reference to header (header has h.meta[:section] to self) + attr_accessor :header_element + + # Array of immediate children of this element + attr_accessor :immediate_children + + # Array of Section inside this section + attr_accessor :section_children + + def initialize + @immediate_children = [] + @section_children = [] + end + end + + class Section + def inspect(indent=1) + s = "" + if @header_element + s += "\_"*indent + "(#{@section_level})>\t #{@section_number.join('.')} : " + s += @header_element.children_to_s + + " (id: '#{@header_element.attributes[:id]}')\n" + else + s += "Master\n" + end + + @section_children.each do |c| + s+=c.inspect(indent+1) + end + s + end + + # Numerate this section and its children + def numerate(a=[]) + self.section_number = a + section_children.each_with_index do |c,i| + c.numerate(a.clone.push(i+1)) + end + if h = self.header_element + h.attributes[:section_number] = self.section_number + end + end + + include REXML + # Creates an HTML toc. + # Call this on the root + def to_html + div = Element.new 'div' + div.attributes['class'] = 'maruku_toc' + div << create_toc + div + end + + def create_toc + ul = Element.new 'ul' + # let's remove the bullets + ul.attributes['style'] = 'list-style: none;' + @section_children.each do |c| + li = Element.new 'li' + if span = c.header_element.render_section_number + li << span + end + a = c.header_element.wrap_as_element('a') + a.delete_attribute 'id' + a.attributes['href'] = "##{c.header_element.attributes[:id]}" + li << a + li << c.create_toc if c.section_children.size>0 + ul << li + end + ul + end + + # Creates a latex toc. + # Call this on the root + def to_latex + to_latex_rec + "\n\n" + end + + def to_latex_rec + s = "" + @section_children.each do |c| + s += "\\noindent" + number = c.header_element.section_number + s += number if number + text = c.header_element.children_to_latex + id = c.header_element.attributes[:id] + s += "\\hyperlink{#{id}}{#{text}}" + s += "\\dotfill \\pageref*{#{id}} \\linebreak\n" + s += c.to_latex_rec if c.section_children.size>0 + + end + s + end + + end + + class MDDocument + + def create_toc + each_element(:header) do |h| + h.attributes[:id] ||= h.generate_id + end + + stack = [] + + # the ancestor section + s = Section.new + s.section_level = 0 + + stack.push s + + i = 0; + while i < @children.size + while i < @children.size + if @children[i].node_type == :header + level = @children[i].level + break if level <= stack.last.section_level+1 + end + + stack.last.immediate_children.push @children[i] + i += 1 + end + + break if i>=@children.size + + header = @children[i] + level = header.level + + if level > stack.last.section_level + # this level is inside + + s2 = Section.new + s2.section_level = level + s2.header_element = header + header.instance_variable_set :@section, s2 + + stack.last.section_children.push s2 + stack.push s2 + + i+=1 + elsif level == stack.last.section_level + # this level is a sibling + stack.pop + else + # this level is a parent + stack.pop + end + + end + + # If there is only one big header, then assume + # it is the master + if s.section_children.size == 1 + s = s.section_children.first + end + + # Assign section numbers + s.numerate + + s + end + end +end \ No newline at end of file diff --git a/lib/maruku/usage/example1.rb b/lib/maruku/usage/example1.rb new file mode 100644 index 00000000..2c20a6fe --- /dev/null +++ b/lib/maruku/usage/example1.rb @@ -0,0 +1,33 @@ +require 'maruku' + +text = <<EOF +Chapter 1 +========= + +It was a stormy and rainy night. + +EOF + +invalid = <<EOF + +This is a [bad link. + +EOF + +Maruku.new(text).to_html + +s = "" + +begin + Maruku.new(invalid, {:on_error => :raise, :error_stream => s}) + puts "Error! It should have thrown an exception." +rescue + # puts "ok, got error" +end + +begin + Maruku.new(invalid, {:on_error => :warning, :error_stream => s}) +rescue + puts "Error! It should not have thrown an exception." +end + diff --git a/lib/maruku/version.rb b/lib/maruku/version.rb new file mode 100644 index 00000000..c8749b27 --- /dev/null +++ b/lib/maruku/version.rb @@ -0,0 +1,38 @@ +#-- +# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org> +# +# This file is part of Maruku. +# +# Maruku is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# Maruku is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Maruku; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#++ + +module MaRuKu + Version = '0.4.2.1' + + MarukuURL = 'http://maruku.rubyforge.org/' + + # If true, use also PHP Markdown extra syntax + # + # Note: it is not guaranteed that if it's false + # then no special features will be used. + def markdown_extra? + true + end + + def new_meta_data? + true + end + +end \ No newline at end of file diff --git a/lib/syntax.rb b/lib/syntax.rb new file mode 100644 index 00000000..604dcc2f --- /dev/null +++ b/lib/syntax.rb @@ -0,0 +1,38 @@ +require 'syntax/common' + +module Syntax + + # A default tokenizer for handling syntaxes that are not explicitly handled + # elsewhere. It simply yields the given text as a single token. + class Default + + # Yield the given text as a single token. + def tokenize( text ) + yield Token.new( text, :normal ) + end + + end + + # A hash for registering syntax implementations. + SYNTAX = Hash.new( Default ) + + # Load the implementation of the requested syntax. If the syntax cannot be + # found, or if it cannot be loaded for whatever reason, the Default syntax + # handler will be returned. + def load( syntax ) + begin + require "syntax/lang/#{syntax}" + rescue LoadError + end + SYNTAX[ syntax ].new + end + module_function :load + + # Return an array of the names of supported syntaxes. + def all + lang_dir = File.join(File.dirname(__FILE__), "syntax", "lang") + Dir["#{lang_dir}/*.rb"].map { |path| File.basename(path, ".rb") } + end + module_function :all + +end diff --git a/lib/syntax/common.rb b/lib/syntax/common.rb new file mode 100644 index 00000000..a986e656 --- /dev/null +++ b/lib/syntax/common.rb @@ -0,0 +1,163 @@ +require 'strscan' + +module Syntax + + # A single token extracted by a tokenizer. It is simply the lexeme + # itself, decorated with a 'group' attribute to identify the type of the + # lexeme. + class Token < String + + # the type of the lexeme that was extracted. + attr_reader :group + + # the instruction associated with this token (:none, :region_open, or + # :region_close) + attr_reader :instruction + + # Create a new Token representing the given text, and belonging to the + # given group. + def initialize( text, group, instruction = :none ) + super text + @group = group + @instruction = instruction + end + + end + + # The base class of all tokenizers. It sets up the scanner and manages the + # looping until all tokens have been extracted. It also provides convenience + # methods to make sure adjacent tokens of identical groups are returned as + # a single token. + class Tokenizer + + # The current group being processed by the tokenizer + attr_reader :group + + # The current chunk of text being accumulated + attr_reader :chunk + + # Start tokenizing. This sets up the state in preparation for tokenization, + # such as creating a new scanner for the text and saving the callback block. + # The block will be invoked for each token extracted. + def start( text, &block ) + @chunk = "" + @group = :normal + @callback = block + @text = StringScanner.new( text ) + setup + end + + # Subclasses may override this method to provide implementation-specific + # setup logic. + def setup + end + + # Finish tokenizing. This flushes the buffer, yielding any remaining text + # to the client. + def finish + start_group nil + teardown + end + + # Subclasses may override this method to provide implementation-specific + # teardown logic. + def teardown + end + + # Subclasses must implement this method, which is called for each iteration + # of the tokenization process. This method may extract multiple tokens. + def step + raise NotImplementedError, "subclasses must implement #step" + end + + # Begins tokenizing the given text, calling #step until the text has been + # exhausted. + def tokenize( text, &block ) + start text, &block + step until @text.eos? + finish + end + + # Specify a set of tokenizer-specific options. Each tokenizer may (or may + # not) publish any options, but if a tokenizer does those options may be + # used to specify optional behavior. + def set( opts={} ) + ( @options ||= Hash.new ).update opts + end + + # Get the value of the specified option. + def option(opt) + @options ? @options[opt] : nil + end + + private + + EOL = /(?=\r\n?|\n|$)/ + + # A convenience for delegating method calls to the scanner. + def self.delegate( sym ) + define_method( sym ) { |*a| @text.__send__( sym, *a ) } + end + + delegate :bol? + delegate :eos? + delegate :scan + delegate :scan_until + delegate :check + delegate :check_until + delegate :getch + delegate :matched + delegate :pre_match + delegate :peek + delegate :pos + + # Access the n-th subgroup from the most recent match. + def subgroup(n) + @text[n] + end + + # Append the given data to the currently active chunk. + def append( data ) + @chunk << data + end + + # Request that a new group be started. If the current group is the same + # as the group being requested, a new group will not be created. If a new + # group is created and the current chunk is not empty, the chunk's + # contents will be yielded to the client as a token, and then cleared. + # + # After the new group is started, if +data+ is non-nil it will be appended + # to the chunk. + def start_group( gr, data=nil ) + flush_chunk if gr != @group + @group = gr + @chunk << data if data + end + + def start_region( gr, data=nil ) + flush_chunk + @group = gr + @callback.call( Token.new( data||"", @group, :region_open ) ) + end + + def end_region( gr, data=nil ) + flush_chunk + @group = gr + @callback.call( Token.new( data||"", @group, :region_close ) ) + end + + def flush_chunk + @callback.call( Token.new( @chunk, @group ) ) unless @chunk.empty? + @chunk = "" + end + + def subtokenize( syntax, text ) + tokenizer = Syntax.load( syntax ) + tokenizer.set @options if @options + flush_chunk + tokenizer.tokenize( text, &@callback ) + end + + end + +end diff --git a/lib/syntax/convertors/abstract.rb b/lib/syntax/convertors/abstract.rb new file mode 100644 index 00000000..46c2f6fe --- /dev/null +++ b/lib/syntax/convertors/abstract.rb @@ -0,0 +1,27 @@ +require 'syntax' + +module Syntax + module Convertors + + # The abstract ancestor class for all convertors. It implements a few + # convenience methods to provide a common interface for all convertors. + class Abstract + + # A reference to the tokenizer used by this convertor. + attr_reader :tokenizer + + # A convenience method for instantiating a new convertor for a + # specific syntax. + def self.for_syntax( syntax ) + new( Syntax.load( syntax ) ) + end + + # Creates a new convertor that uses the given tokenizer. + def initialize( tokenizer ) + @tokenizer = tokenizer + end + + end + + end +end diff --git a/lib/syntax/convertors/html.rb b/lib/syntax/convertors/html.rb new file mode 100644 index 00000000..5df416a8 --- /dev/null +++ b/lib/syntax/convertors/html.rb @@ -0,0 +1,51 @@ +require 'syntax/convertors/abstract' + +module Syntax + module Convertors + + # A simple class for converting a text into HTML. + class HTML < Abstract + + # Converts the given text to HTML, using spans to represent token groups + # of any type but <tt>:normal</tt> (which is always unhighlighted). If + # +pre+ is +true+, the html is automatically wrapped in pre tags. + def convert( text, pre=true ) + html = "" + html << "<pre>" if pre + regions = [] + @tokenizer.tokenize( text ) do |tok| + value = html_escape(tok) + case tok.instruction + when :region_close then + regions.pop + html << "</span>" + when :region_open then + regions.push tok.group + html << "<span class=\"#{tok.group}\">#{value}" + else + if tok.group == ( regions.last || :normal ) + html << value + else + html << "<span class=\"#{tok.group}\">#{value}</span>" + end + end + end + html << "</span>" while regions.pop + html << "</pre>" if pre + html + end + + private + + # Replaces some characters with their corresponding HTML entities. + def html_escape( string ) + string.gsub( /&/, "&" ). + gsub( /</, "<" ). + gsub( />/, ">" ). + gsub( /"/, """ ) + end + + end + + end +end diff --git a/lib/syntax/lang/ruby.rb b/lib/syntax/lang/ruby.rb new file mode 100644 index 00000000..66afaa47 --- /dev/null +++ b/lib/syntax/lang/ruby.rb @@ -0,0 +1,317 @@ +require 'syntax' + +module Syntax + + # A tokenizer for the Ruby language. It recognizes all common syntax + # (and some less common syntax) but because it is not a true lexer, it + # will make mistakes on some ambiguous cases. + class Ruby < Tokenizer + + # The list of all identifiers recognized as keywords. + KEYWORDS = + %w{if then elsif else end begin do rescue ensure while for + class module def yield raise until unless and or not when + case super undef break next redo retry in return alias + defined?} + + # Perform ruby-specific setup + def setup + @selector = false + @allow_operator = false + @heredocs = [] + end + + # Step through a single iteration of the tokenization process. + def step + case + when bol? && check( /=begin/ ) + start_group( :comment, scan_until( /^=end#{EOL}/ ) ) + when bol? && check( /__END__#{EOL}/ ) + start_group( :comment, scan_until( /\Z/ ) ) + else + case + when check( /def\s+/ ) + start_group :keyword, scan( /def\s+/ ) + start_group :method, scan_until( /(?=[;(\s]|#{EOL})/ ) + when check( /class\s+/ ) + start_group :keyword, scan( /class\s+/ ) + start_group :class, scan_until( /(?=[;\s<]|#{EOL})/ ) + when check( /module\s+/ ) + start_group :keyword, scan( /module\s+/ ) + start_group :module, scan_until( /(?=[;\s]|#{EOL})/ ) + when check( /::/ ) + start_group :punct, scan(/::/) + when check( /:"/ ) + start_group :symbol, scan(/:/) + scan_delimited_region :symbol, :symbol, "", true + @allow_operator = true + when check( /:'/ ) + start_group :symbol, scan(/:/) + scan_delimited_region :symbol, :symbol, "", false + @allow_operator = true + when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ ) + start_group :symbol, matched + @allow_operator = true + when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ ) + start_group :char, matched + @allow_operator = true + when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ ) + if @selector || matched[-1] == ?? || matched[-1] == ?! + start_group :ident, + scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/) + else + start_group :constant, + scan(/(__FILE__|__LINE__|true|false|nil|self)/) + end + @selector = false + @allow_operator = true + when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/) + start_group :number, matched + @allow_operator = true + else + case peek(2) + when "%r" + scan_delimited_region :punct, :regex, scan( /../ ), true + @allow_operator = true + when "%w", "%q" + scan_delimited_region :punct, :string, scan( /../ ), false + @allow_operator = true + when "%s" + scan_delimited_region :punct, :symbol, scan( /../ ), false + @allow_operator = true + when "%W", "%Q", "%x" + scan_delimited_region :punct, :string, scan( /../ ), true + @allow_operator = true + when /%[^\sa-zA-Z0-9]/ + scan_delimited_region :punct, :string, scan( /./ ), true + @allow_operator = true + when "<<" + saw_word = ( chunk[-1,1] =~ /[\w!?]/ ) + start_group :punct, scan( /<</ ) + if saw_word + @allow_operator = false + return + end + + float_right = scan( /-/ ) + append "-" if float_right + if ( type = scan( /['"]/ ) ) + append type + delim = scan_until( /(?=#{type})/ ) + if delim.nil? + append scan_until( /\Z/ ) + return + end + else + delim = scan( /\w+/ ) or return + end + start_group :constant, delim + start_group :punct, scan( /#{type}/ ) if type + @heredocs << [ float_right, type, delim ] + @allow_operator = true + else + case peek(1) + when /[\n\r]/ + unless @heredocs.empty? + scan_heredoc(*@heredocs.shift) + else + start_group :normal, scan( /\s+/ ) + end + @allow_operator = false + when /\s/ + start_group :normal, scan( /\s+/ ) + when "#" + start_group :comment, scan( /#[^\n\r]*/ ) + when /[A-Z]/ + start_group @selector ? :ident : :constant, scan( /\w+/ ) + @allow_operator = true + when /[a-z_]/ + word = scan( /\w+[?!]?/ ) + if !@selector && KEYWORDS.include?( word ) + start_group :keyword, word + @allow_operator = false + elsif + start_group :ident, word + @allow_operator = true + end + @selector = false + when /\d/ + start_group :number, + scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ ) + @allow_operator = true + when '"' + scan_delimited_region :punct, :string, "", true + @allow_operator = true + when '/' + if @allow_operator + start_group :punct, scan(%r{/}) + @allow_operator = false + else + scan_delimited_region :punct, :regex, "", true + @allow_operator = true + end + when "'" + scan_delimited_region :punct, :string, "", false + @allow_operator = true + when "." + dots = scan( /\.{1,3}/ ) + start_group :punct, dots + @selector = ( dots.length == 1 ) + when /[@]/ + start_group :attribute, scan( /@{1,2}\w*/ ) + @allow_operator = true + when /[$]/ + start_group :global, scan(/\$/) + start_group :global, scan( /\w+|./ ) if check(/./) + @allow_operator = true + when /[-!?*\/+=<>(\[\{}:;,&|%]/ + start_group :punct, scan(/./) + @allow_operator = false + when /[)\]]/ + start_group :punct, scan(/./) + @allow_operator = true + else + # all else just falls through this, to prevent + # infinite loops... + append getch + end + end + end + end + end + + private + + # Scan a delimited region of text. This handles the simple cases (strings + # delimited with quotes) as well as the more complex cases of %-strings + # and here-documents. + # + # * +delim_group+ is the group to use to classify the delimiters of the + # region + # * +inner_group+ is the group to use to classify the contents of the + # region + # * +starter+ is the text to use as the starting delimiter + # * +exprs+ is a boolean flag indicating whether the region is an + # interpolated string or not + # * +delim+ is the text to use as the delimiter of the region. If +nil+, + # the next character will be treated as the delimiter. + # * +heredoc+ is either +false+, meaning the region is not a heredoc, or + # <tt>:flush</tt> (meaning the delimiter must be flushed left), or + # <tt>:float</tt> (meaning the delimiter doens't have to be flush left). + def scan_delimited_region( delim_group, inner_group, starter, exprs, + delim=nil, heredoc=false ) + # begin + if !delim + start_group delim_group, starter + delim = scan( /./ ) + append delim + + delim = case delim + when '{' then '}' + when '(' then ')' + when '[' then ']' + when '<' then '>' + else delim + end + end + + start_region inner_group + + items = "\\\\|" + if heredoc + items << "(^" + items << '\s*' if heredoc == :float + items << "#{Regexp.escape(delim)}\s*?)#{EOL}" + else + items << "#{Regexp.escape(delim)}" + end + items << "|#(\\$|@@?|\\{)" if exprs + items = Regexp.new( items ) + + loop do + p = pos + match = scan_until( items ) + if match.nil? + start_group inner_group, scan_until( /\Z/ ) + break + else + text = pre_match[p..-1] + start_group inner_group, text if text.length > 0 + case matched.strip + when "\\" + unless exprs + case peek(1) + when "'" + scan(/./) + start_group :escape, "\\'" + when "\\" + scan(/./) + start_group :escape, "\\\\" + else + start_group inner_group, "\\" + end + else + start_group :escape, "\\" + c = getch + append c + case c + when 'x' + append scan( /[a-fA-F0-9]{1,2}/ ) + when /[0-7]/ + append scan( /[0-7]{0,2}/ ) + end + end + when delim + end_region inner_group + start_group delim_group, matched + break + when /^#/ + do_highlight = (option(:expressions) == :highlight) + start_region :expr if do_highlight + start_group :expr, matched + case matched[1] + when ?{ + depth = 1 + content = "" + while depth > 0 + p = pos + c = scan_until( /[\{}]/ ) + if c.nil? + content << scan_until( /\Z/ ) + break + else + depth += ( matched == "{" ? 1 : -1 ) + content << pre_match[p..-1] + content << matched if depth > 0 + end + end + if do_highlight + subtokenize "ruby", content + start_group :expr, "}" + else + append content + "}" + end + when ?$, ?@ + append scan( /\w+/ ) + end + end_region :expr if do_highlight + else raise "unexpected match on #{matched}" + end + end + end + end + + # Scan a heredoc beginning at the current position. + # + # * +float+ indicates whether the delimiter may be floated to the right + # * +type+ is +nil+, a single quote, or a double quote + # * +delim+ is the delimiter to look for + def scan_heredoc(float, type, delim) + scan_delimited_region( :constant, :string, "", type != "'", + delim, float ? :float : :flush ) + end + end + + SYNTAX["ruby"] = Ruby + +end diff --git a/lib/syntax/lang/xml.rb b/lib/syntax/lang/xml.rb new file mode 100644 index 00000000..7d530e02 --- /dev/null +++ b/lib/syntax/lang/xml.rb @@ -0,0 +1,108 @@ +require 'syntax' + +module Syntax + + # A simple implementation of an XML lexer. It handles most cases. It is + # not a validating lexer, meaning it will happily process invalid XML without + # complaining. + class XML < Tokenizer + + # Initialize the lexer. + def setup + @in_tag = false + end + + # Step through a single iteration of the tokenization process. This will + # yield (potentially) many tokens, and possibly zero tokens. + def step + start_group :normal, matched if scan( /\s+/ ) + if @in_tag + case + when scan( /([-\w]+):([-\w]+)/ ) + start_group :namespace, subgroup(1) + start_group :punct, ":" + start_group :attribute, subgroup(2) + when scan( /\d+/ ) + start_group :number, matched + when scan( /[-\w]+/ ) + start_group :attribute, matched + when scan( %r{[/?]?>} ) + @in_tag = false + start_group :punct, matched + when scan( /=/ ) + start_group :punct, matched + when scan( /["']/ ) + scan_string matched + else + append getch + end + elsif ( text = scan_until( /(?=[<&])/ ) ) + start_group :normal, text unless text.empty? + if scan(/<!--.*?(-->|\Z)/m) + start_group :comment, matched + else + case peek(1) + when "<" + start_group :punct, getch + case peek(1) + when "?" + append getch + when "/" + append getch + when "!" + append getch + end + start_group :normal, matched if scan( /\s+/ ) + if scan( /([-\w]+):([-\w]+)/ ) + start_group :namespace, subgroup(1) + start_group :punct, ":" + start_group :tag, subgroup(2) + elsif scan( /[-\w]+/ ) + start_group :tag, matched + end + @in_tag = true + when "&" + if scan( /&\S{1,10};/ ) + start_group :entity, matched + else + start_group :normal, scan( /&/ ) + end + end + end + else + append scan_until( /\Z/ ) + end + end + + private + + # Scan the string starting at the current position, with the given + # delimiter character. + def scan_string( delim ) + start_group :punct, delim + match = /(?=[&\\]|#{delim})/ + loop do + break unless ( text = scan_until( match ) ) + start_group :string, text unless text.empty? + case peek(1) + when "&" + if scan( /&\S{1,10};/ ) + start_group :entity, matched + else + start_group :string, getch + end + when "\\" + start_group :string, getch + append getch || "" + when delim + start_group :punct, getch + break + end + end + end + + end + + SYNTAX["xml"] = XML + +end diff --git a/lib/syntax/lang/yaml.rb b/lib/syntax/lang/yaml.rb new file mode 100644 index 00000000..53b052db --- /dev/null +++ b/lib/syntax/lang/yaml.rb @@ -0,0 +1,105 @@ +require 'syntax' + +module Syntax + + # A simple implementation of an YAML lexer. It handles most cases. It is + # not a validating lexer. + class YAML < Tokenizer + + # Step through a single iteration of the tokenization process. This will + # yield (potentially) many tokens, and possibly zero tokens. + def step + if bol? + case + when scan(/---(\s*.+)?$/) + start_group :document, matched + when scan(/(\s*)([a-zA-Z][-\w]*)(\s*):/) + start_group :normal, subgroup(1) + start_group :key, subgroup(2) + start_group :normal, subgroup(3) + start_group :punct, ":" + when scan(/(\s*)-/) + start_group :normal, subgroup(1) + start_group :punct, "-" + when scan(/\s*$/) + start_group :normal, matched + when scan(/#.*$/) + start_group :comment, matched + else + append getch + end + else + case + when scan(/[\n\r]+/) + start_group :normal, matched + when scan(/[ \t]+/) + start_group :normal, matched + when scan(/!+(.*?^)?\S+/) + start_group :type, matched + when scan(/&\S+/) + start_group :anchor, matched + when scan(/\*\S+/) + start_group :ref, matched + when scan(/\d\d:\d\d:\d\d/) + start_group :time, matched + when scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/) + start_group :date, matched + when scan(/['"]/) + start_group :punct, matched + scan_string matched + when scan(/:\w+/) + start_group :symbol, matched + when scan(/[:]/) + start_group :punct, matched + when scan(/#.*$/) + start_group :comment, matched + when scan(/>-?/) + start_group :punct, matched + start_group :normal, scan(/.*$/) + append getch until eos? || bol? + return if eos? + indent = check(/ */) + start_group :string + loop do + line = check_until(/[\n\r]|\Z/) + break if line.nil? + if line.chomp.length > 0 + this_indent = line.chomp.match( /^\s*/ )[0] + break if this_indent.length < indent.length + end + append scan_until(/[\n\r]|\Z/) + end + else + start_group :normal, scan_until(/(?=$|#)/) + end + end + end + + private + + def scan_string( delim ) + regex = /(?=[#{delim=="'" ? "" : "\\\\"}#{delim}])/ + loop do + text = scan_until( regex ) + if text.nil? + start_group :string, scan_until( /\Z/ ) + break + else + start_group :string, text unless text.empty? + end + + case peek(1) + when "\\" + start_group :expr, scan(/../) + else + start_group :punct, getch + break + end + end + end + + end + + SYNTAX["yaml"] = YAML + +end diff --git a/lib/syntax/version.rb b/lib/syntax/version.rb new file mode 100644 index 00000000..d5330468 --- /dev/null +++ b/lib/syntax/version.rb @@ -0,0 +1,9 @@ +module Syntax + module Version + MAJOR=1 + MINOR=0 + TINY=0 + + STRING=[MAJOR,MINOR,TINY].join('.') + end +end diff --git a/log/production.log b/log/production.log new file mode 100644 index 00000000..8b84199c --- /dev/null +++ b/log/production.log @@ -0,0 +1,3 @@ +# Logfile created on Mon Jan 22 07:45:04 CST 2007 by logger.rb/1.5.2.7 +Migrating to Beta1Schema (1) +Migrating to Beta2ChangesBulk (2) diff --git a/public/stylesheets/instiki.css b/public/stylesheets/instiki.css index e676f3b9..2d6aedf3 100644 --- a/public/stylesheets/instiki.css +++ b/public/stylesheets/instiki.css @@ -317,4 +317,9 @@ div.errorExplanation p,div.errorExplanation li { border:none; margin:0; padding:0; -} \ No newline at end of file +} + +merror {display:inline;font-size:1em;} +math[display=block] {overflow:auto;} +math { white-space: nowrap } +.maruku-eq-number {float:right}