Moved Maruku (and its dependencies) and XHTMLDiff (and its dependencies) to vendor/plugins/ .

Synced with Instiki SVN.
This commit is contained in:
Jacques Distler 2007-02-10 23:03:15 -06:00
parent 64037c67ac
commit 63e217bcfd
59 changed files with 40 additions and 1 deletions

1105
vendor/plugins/diff/lib/diff/lcs.rb vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,21 @@
#! /usr/env/bin ruby
#--
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
# adapted from:
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
# implements McIlroy-Hunt diff algorithm
#
# This program is free software. It may be redistributed and/or modified under
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
# Ruby licence.
#
# $Id: array.rb,v 1.3 2004/08/08 20:33:09 austin Exp $
#++
# Includes Diff::LCS into the Array built-in class.
require 'diff/lcs'
class Array
include Diff::LCS
end

View file

@ -0,0 +1,51 @@
#! /usr/env/bin ruby
#--
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
# adapted from:
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
# implements McIlroy-Hunt diff algorithm
#
# This program is free software. It may be redistributed and/or modified under
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
# Ruby licence.
#
# $Id: block.rb,v 1.3 2004/08/08 20:33:09 austin Exp $
#++
# Contains Diff::LCS::Block for bin/ldiff.
# A block is an operation removing, adding, or changing a group of items.
# Basically, this is just a list of changes, where each change adds or
# deletes a single item. Used by bin/ldiff.
class Diff::LCS::Block
attr_reader :changes, :insert, :remove
def initialize(chunk)
@changes = []
@insert = []
@remove = []
chunk.each do |item|
@changes << item
@remove << item if item.deleting?
@insert << item if item.adding?
end
end
def diff_size
@insert.size - @remove.size
end
def op
case [@remove.empty?, @insert.empty?]
when [false, false]
'!'
when [false, true]
'-'
when [true, false]
'+'
else # [true, true]
'^'
end
end
end

View file

@ -0,0 +1,322 @@
#! /usr/env/bin ruby
#--
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
# adapted from:
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
# implements McIlroy-Hunt diff algorithm
#
# This program is free software. It may be redistributed and/or modified under
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
# Ruby licence.
#
# $Id: callbacks.rb,v 1.4 2004/09/14 18:51:26 austin Exp $
#++
# Contains definitions for all default callback objects.
require 'diff/lcs/change'
module Diff::LCS
# This callback object implements the default set of callback events, which
# only returns the event itself. Note that #finished_a and #finished_b are
# not implemented -- I haven't yet figured out where they would be useful.
#
# Note that this is intended to be called as is, e.g.,
#
# Diff::LCS.LCS(seq1, seq2, Diff::LCS::DefaultCallbacks)
class DefaultCallbacks
class << self
# Called when two items match.
def match(event)
event
end
# Called when the old value is discarded in favour of the new value.
def discard_a(event)
event
end
# Called when the new value is discarded in favour of the old value.
def discard_b(event)
event
end
# Called when both the old and new values have changed.
def change(event)
event
end
private :new
end
end
# An alias for DefaultCallbacks that is used in Diff::LCS#traverse_sequences.
#
# Diff::LCS.LCS(seq1, seq2, Diff::LCS::SequenceCallbacks)
SequenceCallbacks = DefaultCallbacks
# An alias for DefaultCallbacks that is used in Diff::LCS#traverse_balanced.
#
# Diff::LCS.LCS(seq1, seq2, Diff::LCS::BalancedCallbacks)
BalancedCallbacks = DefaultCallbacks
end
# This will produce a compound array of simple diff change objects. Each
# element in the #diffs array is a +hunk+ or +hunk+ array, where each
# element in each +hunk+ array is a single Change object representing the
# addition or removal of a single element from one of the two tested
# sequences. The +hunk+ provides the full context for the changes.
#
# diffs = Diff::LCS.diff(seq1, seq2)
# # This example shows a simplified array format.
# # [ [ [ '-', 0, 'a' ] ], # 1
# # [ [ '+', 2, 'd' ] ], # 2
# # [ [ '-', 4, 'h' ], # 3
# # [ '+', 4, 'f' ] ],
# # [ [ '+', 6, 'k' ] ], # 4
# # [ [ '-', 8, 'n' ], # 5
# # [ '-', 9, 'p' ],
# # [ '+', 9, 'r' ],
# # [ '+', 10, 's' ],
# # [ '+', 11, 't' ] ] ]
#
# There are five hunks here. The first hunk says that the +a+ at position 0
# of the first sequence should be deleted (<tt>'-'</tt>). The second hunk
# says that the +d+ at position 2 of the second sequence should be inserted
# (<tt>'+'</tt>). The third hunk says that the +h+ at position 4 of the
# first sequence should be removed and replaced with the +f+ from position 4
# of the second sequence. The other two hunks are described similarly.
#
# === Use
# This callback object must be initialised and is used by the Diff::LCS#diff
# method.
#
# cbo = Diff::LCS::DiffCallbacks.new
# Diff::LCS.LCS(seq1, seq2, cbo)
# cbo.finish
#
# Note that the call to #finish is absolutely necessary, or the last set of
# changes will not be visible. Alternatively, can be used as:
#
# cbo = Diff::LCS::DiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) }
#
# The necessary #finish call will be made.
#
# === Simplified Array Format
# The simplified array format used in the example above can be obtained
# with:
#
# require 'pp'
# pp diffs.map { |e| e.map { |f| f.to_a } }
class Diff::LCS::DiffCallbacks
# Returns the difference set collected during the diff process.
attr_reader :diffs
def initialize # :yields self:
@hunk = []
@diffs = []
if block_given?
begin
yield self
ensure
self.finish
end
end
end
# Finalizes the diff process. If an unprocessed hunk still exists, then it
# is appended to the diff list.
def finish
add_nonempty_hunk
end
def match(event)
add_nonempty_hunk
end
def discard_a(event)
@hunk << Diff::LCS::Change.new('-', event.old_position, event.old_element)
end
def discard_b(event)
@hunk << Diff::LCS::Change.new('+', event.new_position, event.new_element)
end
private
def add_nonempty_hunk
@diffs << @hunk unless @hunk.empty?
@hunk = []
end
end
# This will produce a compound array of contextual diff change objects. Each
# element in the #diffs array is a "hunk" array, where each element in each
# "hunk" array is a single change. Each change is a Diff::LCS::ContextChange
# that contains both the old index and new index values for the change. The
# "hunk" provides the full context for the changes. Both old and new objects
# will be presented for changed objects. +nil+ will be substituted for a
# discarded object.
#
# seq1 = %w(a b c e h j l m n p)
# seq2 = %w(b c d e f j k l m r s t)
#
# diffs = Diff::LCS.diff(seq1, seq2, Diff::LCS::ContextDiffCallbacks)
# # This example shows a simplified array format.
# # [ [ [ '-', [ 0, 'a' ], [ 0, nil ] ] ], # 1
# # [ [ '+', [ 3, nil ], [ 2, 'd' ] ] ], # 2
# # [ [ '-', [ 4, 'h' ], [ 4, nil ] ], # 3
# # [ '+', [ 5, nil ], [ 4, 'f' ] ] ],
# # [ [ '+', [ 6, nil ], [ 6, 'k' ] ] ], # 4
# # [ [ '-', [ 8, 'n' ], [ 9, nil ] ], # 5
# # [ '+', [ 9, nil ], [ 9, 'r' ] ],
# # [ '-', [ 9, 'p' ], [ 10, nil ] ],
# # [ '+', [ 10, nil ], [ 10, 's' ] ],
# # [ '+', [ 10, nil ], [ 11, 't' ] ] ] ]
#
# The five hunks shown are comprised of individual changes; if there is a
# related set of changes, they are still shown individually.
#
# This callback can also be used with Diff::LCS#sdiff, which will produce
# results like:
#
# diffs = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextCallbacks)
# # This example shows a simplified array format.
# # [ [ [ "-", [ 0, "a" ], [ 0, nil ] ] ], # 1
# # [ [ "+", [ 3, nil ], [ 2, "d" ] ] ], # 2
# # [ [ "!", [ 4, "h" ], [ 4, "f" ] ] ], # 3
# # [ [ "+", [ 6, nil ], [ 6, "k" ] ] ], # 4
# # [ [ "!", [ 8, "n" ], [ 9, "r" ] ], # 5
# # [ "!", [ 9, "p" ], [ 10, "s" ] ],
# # [ "+", [ 10, nil ], [ 11, "t" ] ] ] ]
#
# The five hunks are still present, but are significantly shorter in total
# presentation, because changed items are shown as changes ("!") instead of
# potentially "mismatched" pairs of additions and deletions.
#
# The result of this operation is similar to that of
# Diff::LCS::SDiffCallbacks. They may be compared as:
#
# s = Diff::LCS.sdiff(seq1, seq2).reject { |e| e.action == "=" }
# c = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextDiffCallbacks).flatten
#
# s == c # -> true
#
# === Use
# This callback object must be initialised and can be used by the
# Diff::LCS#diff or Diff::LCS#sdiff methods.
#
# cbo = Diff::LCS::ContextDiffCallbacks.new
# Diff::LCS.LCS(seq1, seq2, cbo)
# cbo.finish
#
# Note that the call to #finish is absolutely necessary, or the last set of
# changes will not be visible. Alternatively, can be used as:
#
# cbo = Diff::LCS::ContextDiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) }
#
# The necessary #finish call will be made.
#
# === Simplified Array Format
# The simplified array format used in the example above can be obtained
# with:
#
# require 'pp'
# pp diffs.map { |e| e.map { |f| f.to_a } }
class Diff::LCS::ContextDiffCallbacks < Diff::LCS::DiffCallbacks
def discard_a(event)
@hunk << Diff::LCS::ContextChange.simplify(event)
end
def discard_b(event)
@hunk << Diff::LCS::ContextChange.simplify(event)
end
def change(event)
@hunk << Diff::LCS::ContextChange.simplify(event)
end
end
# This will produce a simple array of diff change objects. Each element in
# the #diffs array is a single ContextChange. In the set of #diffs provided
# by SDiffCallbacks, both old and new objects will be presented for both
# changed <strong>and unchanged</strong> objects. +nil+ will be substituted
# for a discarded object.
#
# The diffset produced by this callback, when provided to Diff::LCS#sdiff,
# will compute and display the necessary components to show two sequences
# and their minimized differences side by side, just like the Unix utility
# +sdiff+.
#
# same same
# before | after
# old < -
# - > new
#
# seq1 = %w(a b c e h j l m n p)
# seq2 = %w(b c d e f j k l m r s t)
#
# diffs = Diff::LCS.sdiff(seq1, seq2)
# # This example shows a simplified array format.
# # [ [ "-", [ 0, "a"], [ 0, nil ] ],
# # [ "=", [ 1, "b"], [ 0, "b" ] ],
# # [ "=", [ 2, "c"], [ 1, "c" ] ],
# # [ "+", [ 3, nil], [ 2, "d" ] ],
# # [ "=", [ 3, "e"], [ 3, "e" ] ],
# # [ "!", [ 4, "h"], [ 4, "f" ] ],
# # [ "=", [ 5, "j"], [ 5, "j" ] ],
# # [ "+", [ 6, nil], [ 6, "k" ] ],
# # [ "=", [ 6, "l"], [ 7, "l" ] ],
# # [ "=", [ 7, "m"], [ 8, "m" ] ],
# # [ "!", [ 8, "n"], [ 9, "r" ] ],
# # [ "!", [ 9, "p"], [ 10, "s" ] ],
# # [ "+", [ 10, nil], [ 11, "t" ] ] ]
#
# The result of this operation is similar to that of
# Diff::LCS::ContextDiffCallbacks. They may be compared as:
#
# s = Diff::LCS.sdiff(seq1, seq2).reject { |e| e.action == "=" }
# c = Diff::LCS.sdiff(seq1, seq2, Diff::LCS::ContextDiffCallbacks).flatten
#
# s == c # -> true
#
# === Use
# This callback object must be initialised and is used by the Diff::LCS#sdiff
# method.
#
# cbo = Diff::LCS::SDiffCallbacks.new
# Diff::LCS.LCS(seq1, seq2, cbo)
#
# As with the other initialisable callback objects, Diff::LCS::SDiffCallbacks
# can be initialised with a block. As there is no "fininishing" to be done,
# this has no effect on the state of the object.
#
# cbo = Diff::LCS::SDiffCallbacks.new { |tcbo| Diff::LCS.LCS(seq1, seq2, tcbo) }
#
# === Simplified Array Format
# The simplified array format used in the example above can be obtained
# with:
#
# require 'pp'
# pp diffs.map { |e| e.to_a }
class Diff::LCS::SDiffCallbacks
# Returns the difference set collected during the diff process.
attr_reader :diffs
def initialize #:yields self:
@diffs = []
yield self if block_given?
end
def match(event)
@diffs << Diff::LCS::ContextChange.simplify(event)
end
def discard_a(event)
@diffs << Diff::LCS::ContextChange.simplify(event)
end
def discard_b(event)
@diffs << Diff::LCS::ContextChange.simplify(event)
end
def change(event)
@diffs << Diff::LCS::ContextChange.simplify(event)
end
end

View file

@ -0,0 +1,169 @@
#! /usr/env/bin ruby
#--
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
# adapted from:
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
# implements McIlroy-Hunt diff algorithm
#
# This program is free software. It may be redistributed and/or modified under
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
# Ruby licence.
#
# $Id: change.rb,v 1.4 2004/08/08 20:33:09 austin Exp $
#++
# Provides Diff::LCS::Change and Diff::LCS::ContextChange.
# Centralises the change test code in Diff::LCS::Change and
# Diff::LCS::ContextChange, since it's the same for both classes.
module Diff::LCS::ChangeTypeTests
def deleting?
@action == '-'
end
def adding?
@action == '+'
end
def unchanged?
@action == '='
end
def changed?
@changed == '!'
end
def finished_a?
@changed == '>'
end
def finished_b?
@changed == '<'
end
end
# Represents a simplistic (non-contextual) change. Represents the removal or
# addition of an element from either the old or the new sequenced enumerable.
class Diff::LCS::Change
# Returns the action this Change represents. Can be '+' (#adding?), '-'
# (#deleting?), '=' (#unchanged?), # or '!' (#changed?). When created by
# Diff::LCS#diff or Diff::LCS#sdiff, it may also be '>' (#finished_a?) or
# '<' (#finished_b?).
attr_reader :action
attr_reader :position
attr_reader :element
include Comparable
def ==(other)
(self.action == other.action) and
(self.position == other.position) and
(self.element == other.element)
end
def <=>(other)
r = self.action <=> other.action
r = self.position <=> other.position if r.zero?
r = self.element <=> other.element if r.zero?
r
end
def initialize(action, position, element)
@action = action
@position = position
@element = element
end
# Creates a Change from an array produced by Change#to_a.
def to_a
[@action, @position, @element]
end
def self.from_a(arr)
Diff::LCS::Change.new(arr[0], arr[1], arr[2])
end
include Diff::LCS::ChangeTypeTests
end
# Represents a contextual change. Contains the position and values of the
# elements in the old and the new sequenced enumerables as well as the action
# taken.
class Diff::LCS::ContextChange
# Returns the action this Change represents. Can be '+' (#adding?), '-'
# (#deleting?), '=' (#unchanged?), # or '!' (#changed?). When
# created by Diff::LCS#diff or Diff::LCS#sdiff, it may also be '>'
# (#finished_a?) or '<' (#finished_b?).
attr_reader :action
attr_reader :old_position
attr_reader :old_element
attr_reader :new_position
attr_reader :new_element
include Comparable
def ==(other)
(@action == other.action) and
(@old_position == other.old_position) and
(@new_position == other.new_position) and
(@old_element == other.old_element) and
(@new_element == other.new_element)
end
def inspect(*args)
%Q(#<#{self.class.name}:#{__id__} @action=#{action} positions=#{old_position},#{new_position} elements=#{old_element.inspect},#{new_element.inspect}>)
end
def <=>(other)
r = @action <=> other.action
r = @old_position <=> other.old_position if r.zero?
r = @new_position <=> other.new_position if r.zero?
r = @old_element <=> other.old_element if r.zero?
r = @new_element <=> other.new_element if r.zero?
r
end
def initialize(action, old_position, old_element, new_position, new_element)
@action = action
@old_position = old_position
@old_element = old_element
@new_position = new_position
@new_element = new_element
end
def to_a
[@action, [@old_position, @old_element], [@new_position, @new_element]]
end
# Creates a ContextChange from an array produced by ContextChange#to_a.
def self.from_a(arr)
if arr.size == 5
Diff::LCS::ContextChange.new(arr[0], arr[1], arr[2], arr[3], arr[4])
else
Diff::LCS::ContextChange.new(arr[0], arr[1][0], arr[1][1], arr[2][0],
arr[2][1])
end
end
# Simplifies a context change for use in some diff callbacks. '<' actions
# are converted to '-' and '>' actions are converted to '+'.
def self.simplify(event)
ea = event.to_a
case ea[0]
when '-'
ea[2][1] = nil
when '<'
ea[0] = '-'
ea[2][1] = nil
when '+'
ea[1][1] = nil
when '>'
ea[0] = '+'
ea[1][1] = nil
end
Diff::LCS::ContextChange.from_a(ea)
end
include Diff::LCS::ChangeTypeTests
end

257
vendor/plugins/diff/lib/diff/lcs/hunk.rb vendored Normal file
View file

@ -0,0 +1,257 @@
#! /usr/env/bin ruby
#--
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
# adapted from:
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
# implements McIlroy-Hunt diff algorithm
#
# This program is free software. It may be redistributed and/or modified under
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
# Ruby licence.
#
# $Id: hunk.rb,v 1.2 2004/08/08 20:33:09 austin Exp $
#++
# Contains Diff::LCS::Hunk for bin/ldiff.
require 'diff/lcs/block'
# A Hunk is a group of Blocks which overlap because of the context
# surrounding each block. (So if we're not using context, every hunk will
# contain one block.) Used in the diff program (bin/diff).
class Diff::LCS::Hunk
# Create a hunk using references to both the old and new data, as well as
# the piece of data
def initialize(data_old, data_new, piece, context, file_length_difference)
# At first, a hunk will have just one Block in it
@blocks = [ Diff::LCS::Block.new(piece) ]
@data_old = data_old
@data_new = data_new
before = after = file_length_difference
after += @blocks[0].diff_size
@file_length_difference = after # The caller must get this manually
# Save the start & end of each array. If the array doesn't exist
# (e.g., we're only adding items in this block), then figure out the
# line number based on the line number of the other file and the
# current difference in file lengths.
if @blocks[0].remove.empty?
a1 = a2 = nil
else
a1 = @blocks[0].remove[0].position
a2 = @blocks[0].remove[-1].position
end
if @blocks[0].insert.empty?
b1 = b2 = nil
else
b1 = @blocks[0].insert[0].position
b2 = @blocks[0].insert[-1].position
end
@start_old = a1 || (b1 - before)
@start_new = b1 || (a1 + before)
@end_old = a2 || (b2 - after)
@end_new = b2 || (a2 + after)
self.flag_context = context
end
attr_reader :blocks
attr_reader :start_old, :start_new
attr_reader :end_old, :end_new
attr_reader :file_length_difference
# Change the "start" and "end" fields to note that context should be added
# to this hunk
attr_accessor :flag_context
def flag_context=(context) #:nodoc:
return if context.nil? or context.zero?
add_start = (context > @start_old) ? @start_old : context
@start_old -= add_start
@start_new -= add_start
if (@end_old + context) > @data_old.size
add_end = @data_old.size - @end_old
else
add_end = context
end
@end_old += add_end
@end_new += add_end
end
def unshift(hunk)
@start_old = hunk.start_old
@start_new = hunk.start_new
blocks.unshift(*hunk.blocks)
end
# Is there an overlap between hunk arg0 and old hunk arg1? Note: if end
# of old hunk is one less than beginning of second, they overlap
def overlaps?(hunk = nil)
return nil if hunk.nil?
a = (@start_old - hunk.end_old) <= 1
b = (@start_new - hunk.end_new) <= 1
return (a or b)
end
def diff(format)
case format
when :old
old_diff
when :unified
unified_diff
when :context
context_diff
when :ed
self
when :reverse_ed, :ed_finish
ed_diff(format)
else
raise "Unknown diff format #{format}."
end
end
def each_old(block)
@data_old[@start_old .. @end_old].each { |e| yield e }
end
private
# Note that an old diff can't have any context. Therefore, we know that
# there's only one block in the hunk.
def old_diff
warn "Expecting only one block in an old diff hunk!" if @blocks.size > 1
op_act = { "+" => 'a', "-" => 'd', "!" => "c" }
block = @blocks[0]
# Calculate item number range. Old diff range is just like a context
# diff range, except the ranges are on one line with the action between
# them.
s = "#{context_range(:old)}#{op_act[block.op]}#{context_range(:new)}\n"
# If removing anything, just print out all the remove lines in the hunk
# which is just all the remove lines in the block.
@data_old[@start_old .. @end_old].each { |e| s << "< #{e}\n" } unless block.remove.empty?
s << "---\n" if block.op == "!"
@data_new[@start_new .. @end_new].each { |e| s << "> #{e}\n" } unless block.insert.empty?
s
end
def unified_diff
# Calculate item number range.
s = "@@ -#{unified_range(:old)} +#{unified_range(:new)} @@\n"
# Outlist starts containing the hunk of the old file. Removing an item
# just means putting a '-' in front of it. Inserting an item requires
# getting it from the new file and splicing it in. We splice in
# +num_added+ items. Remove blocks use +num_added+ because splicing
# changed the length of outlist.
#
# We remove +num_removed+ items. Insert blocks use +num_removed+
# because their item numbers -- corresponding to positions in the NEW
# file -- don't take removed items into account.
lo, hi, num_added, num_removed = @start_old, @end_old, 0, 0
outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
@blocks.each do |block|
block.remove.each do |item|
op = item.action.to_s # -
offset = item.position - lo + num_added
outlist[offset].gsub!(/^ /, op.to_s)
num_removed += 1
end
block.insert.each do |item|
op = item.action.to_s # +
offset = item.position - @start_new + num_removed
outlist[offset, 0] = "#{op}#{@data_new[item.position]}"
num_added += 1
end
end
s << outlist.join("\n")
end
def context_diff
s = "***************\n"
s << "*** #{context_range(:old)} ****\n"
r = context_range(:new)
# Print out file 1 part for each block in context diff format if there
# are any blocks that remove items
lo, hi = @start_old, @end_old
removes = @blocks.select { |e| not e.remove.empty? }
if removes
outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
removes.each do |block|
block.remove.each do |item|
outlist[item.position - lo].gsub!(/^ /) { block.op } # - or !
end
end
s << outlist.join("\n")
end
s << "\n--- #{r} ----\n"
lo, hi = @start_new, @end_new
inserts = @blocks.select { |e| not e.insert.empty? }
if inserts
outlist = @data_new[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
inserts.each do |block|
block.insert.each do |item|
outlist[item.position - lo].gsub!(/^ /) { block.op } # + or !
end
end
s << outlist.join("\n")
end
s
end
def ed_diff(format)
op_act = { "+" => 'a', "-" => 'd', "!" => "c" }
warn "Expecting only one block in an old diff hunk!" if @blocks.size > 1
if format == :reverse_ed
s = "#{op_act[@blocks[0].op]}#{context_range(:old)}\n"
else
s = "#{context_range(:old).gsub(/,/, ' ')}#{op_act[@blocks[0].op]}\n"
end
unless @blocks[0].insert.empty?
@data_new[@start_new .. @end_new].each { |e| s << "#{e}\n" }
s << ".\n"
end
s
end
# Generate a range of item numbers to print. Only print 1 number if the
# range has only one item in it. Otherwise, it's 'start,end'
def context_range(mode)
case mode
when :old
s, e = (@start_old + 1), (@end_old + 1)
when :new
s, e = (@start_new + 1), (@end_new + 1)
end
(s < e) ? "#{s},#{e}" : "#{e}"
end
# Generate a range of item numbers to print for unified diff. Print
# number where block starts, followed by number of lines in the block
# (don't print number of lines if it's 1)
def unified_range(mode)
case mode
when :old
s, e = (@start_old + 1), (@end_old + 1)
when :new
s, e = (@start_new + 1), (@end_new + 1)
end
length = e - s + 1
first = (length < 2) ? e : s # "strange, but correct"
(length == 1) ? "#{first}" : "#{first},#{length}"
end
end

View file

@ -0,0 +1,226 @@
#!/usr/bin/env ruby
require 'optparse'
require 'ostruct'
require 'diff/lcs/hunk'
# == ldiff Usage
# ldiff [options] oldfile newfile
#
# -c:: Displays a context diff with 3 lines of context.
# -C [LINES], --context [LINES]:: Displays a context diff with LINES lines of context. Default 3 lines.
# -u:: Displays a unified diff with 3 lines of context.
# -U [LINES], --unified [LINES]:: Displays a unified diff with LINES lines of context. Default 3 lines.
# -e:: Creates an 'ed' script to change oldfile to newfile.
# -f:: Creates an 'ed' script to change oldfile to newfile in reverse order.
# -a, --text:: Treats the files as text and compares them line-by-line, even if they do not seem to be text.
# --binary:: Treats the files as binary.
# -q, --brief:: Reports only whether or not the files differ, not the details.
# --help:: Shows the command-line help.
# --version:: Shows the version of Diff::LCS.
#
# By default, runs produces an "old-style" diff, with output like UNIX diff.
#
# == Copyright
# Copyright &copy; 2004 Austin Ziegler
#
# Part of Diff::LCS <http://rubyforge.org/projects/ruwiki/>
# Austin Ziegler <diff-lcs@halostatue.ca>
#
# This program is free software. It may be redistributed and/or modified under
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
# Ruby licence.
module Diff::LCS::Ldiff
BANNER = <<-COPYRIGHT
ldiff #{Diff::LCS::VERSION}
Copyright © 2004 Austin Ziegler
Part of Diff::LCS.
http://rubyforge.org/projects/ruwiki/
Austin Ziegler <diff-lcs@halostatue.ca>
This program is free software. It may be redistributed and/or modified under
the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
Ruby licence.
$Id: ldiff.rb,v 1.1 2004/09/26 01:37:49 austin Exp $
COPYRIGHT
class << self
attr_reader :format, :lines #:nodoc:
attr_reader :file_old, :file_new #:nodoc:
attr_reader :data_old, :data_new #:nodoc:
def run(args, input = $stdin, output = $stdout, error = $stderr) #:nodoc:
args.options do |o|
o.banner = "Usage: #{File.basename($0)} [options] oldfile newfile"
o.separator ""
o.on('-c',
'Displays a context diff with 3 lines of',
'context.') do |ctx|
@format = :context
@lines = 3
end
o.on('-C', '--context [LINES]', Numeric,
'Displays a context diff with LINES lines',
'of context. Default 3 lines.') do |ctx|
@format = :context
@lines = ctx || 3
end
o.on('-u',
'Displays a unified diff with 3 lines of',
'context.') do |ctx|
@format = :unified
@lines = 3
end
o.on('-U', '--unified [LINES]', Numeric,
'Displays a unified diff with LINES lines',
'of context. Default 3 lines.') do |ctx|
@format = :unified
@lines = ctx || 3
end
o.on('-e',
'Creates an \'ed\' script to change',
'oldfile to newfile.') do |ctx|
@format = :ed
end
o.on('-f',
'Creates an \'ed\' script to change',
'oldfile to newfile in reverse order.') do |ctx|
@format = :reverse_ed
end
o.on('-a', '--text',
'Treat the files as text and compare them',
'line-by-line, even if they do not seem',
'to be text.') do |txt|
@binary = false
end
o.on('--binary',
'Treats the files as binary.') do |bin|
@binary = true
end
o.on('-q', '--brief',
'Report only whether or not the files',
'differ, not the details.') do |ctx|
@format = :report
end
o.on_tail('--help', 'Shows this text.') do
error << o
return 0
end
o.on_tail('--version', 'Shows the version of Diff::LCS.') do
error << BANNER
return 0
end
o.on_tail ""
o.on_tail 'By default, runs produces an "old-style" diff, with output like UNIX diff.'
o.parse!
end
unless args.size == 2
error << args.options
return 127
end
# Defaults are for old-style diff
@format ||= :old
@lines ||= 0
file_old, file_new = *ARGV
case @format
when :context
char_old = '*' * 3
char_new = '-' * 3
when :unified
char_old = '-' * 3
char_new = '+' * 3
end
# After we've read up to a certain point in each file, the number of
# items we've read from each file will differ by FLD (could be 0).
file_length_difference = 0
if @binary.nil? or @binary
data_old = IO::read(file_old)
data_new = IO::read(file_new)
# Test binary status
if @binary.nil?
old_txt = data_old[0...4096].grep(/\0/).empty?
new_txt = data_new[0...4096].grep(/\0/).empty?
@binary = (not old_txt) or (not new_txt)
old_txt = new_txt = nil
end
unless @binary
data_old = data_old.split(/\n/).map! { |e| e.chomp }
data_new = data_new.split(/\n/).map! { |e| e.chomp }
end
else
data_old = IO::readlines(file_old).map! { |e| e.chomp }
data_new = IO::readlines(file_new).map! { |e| e.chomp }
end
# diff yields lots of pieces, each of which is basically a Block object
if @binary
diffs = (data_old == data_new)
else
diffs = Diff::LCS.diff(data_old, data_new)
diffs = nil if diffs.empty?
end
return 0 unless diffs
if (@format == :report) and diffs
output << "Files #{file_old} and #{file_new} differ\n"
return 1
end
if (@format == :unified) or (@format == :context)
ft = File.stat(file_old).mtime.localtime.strftime('%Y-%m-%d %H:%M:%S %z')
puts "#{char_old} #{file_old}\t#{ft}"
ft = File.stat(file_new).mtime.localtime.strftime('%Y-%m-%d %H:%M:%S %z')
puts "#{char_new} #{file_new}\t#{ft}"
end
# Loop over hunks. If a hunk overlaps with the last hunk, join them.
# Otherwise, print out the old one.
oldhunk = hunk = nil
if @format == :ed
real_output = output
output = []
end
diffs.each do |piece|
begin
hunk = Diff::LCS::Hunk.new(data_old, data_new, piece, @lines,
file_length_difference)
file_length_difference = hunk.file_length_difference
next unless oldhunk
if (@lines > 0) and hunk.overlaps?(oldhunk)
hunk.unshift(oldhunk)
else
output << oldhunk.diff(@format)
end
ensure
oldhunk = hunk
output << "\n"
end
end
output << oldhunk.diff(@format)
output << "\n"
if @format == :ed
output.reverse_each { |e| real_output << e.diff(:ed_finish) }
end
return 1
end
end
end

View file

@ -0,0 +1,19 @@
#! /usr/env/bin ruby
#--
# Copyright 2004 Austin Ziegler <diff-lcs@halostatue.ca>
# adapted from:
# Algorithm::Diff (Perl) by Ned Konz <perl@bike-nomad.com>
# Smalltalk by Mario I. Wolczko <mario@wolczko.com>
# implements McIlroy-Hunt diff algorithm
#
# This program is free software. It may be redistributed and/or modified under
# the terms of the GPL version 2 (or later), the Perl Artistic licence, or the
# Ruby licence.
#
# $Id: string.rb,v 1.3 2004/08/08 20:33:09 austin Exp $
#++
# Includes Diff::LCS into String.
class String
include Diff::LCS
end

133
vendor/plugins/maruku/lib/maruku.rb vendored Normal file
View file

@ -0,0 +1,133 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'rexml/document'
# :include:MaRuKu.txt
module MaRuKu
module In
module Markdown
module SpanLevelParser; end
module BlockLevelParser; end
end
# more to come?
end
module Out
# Functions for exporting to MarkDown.
module Markdown; end
# Functions for exporting to HTML.
module HTML; end
# Functions for exporting to Latex
module Latex; end
end
# These are strings utilities.
module Strings; end
module Helpers; end
module Errors; end
class MDElement
include REXML
include MaRuKu
include Out::Markdown
include Out::HTML
include Out::Latex
include Strings
include Helpers
include Errors
end
class MDDocument < MDElement
include In::Markdown
include In::Markdown::SpanLevelParser
include In::Markdown::BlockLevelParser
end
end
# This is the public interface
class Maruku < MaRuKu::MDDocument; end
require 'rexml/document'
# Structures definition
require 'maruku/structures'
require 'maruku/structures_inspect'
require 'maruku/defaults'
# Less typing
require 'maruku/helpers'
# Code for parsing whole Markdown documents
require 'maruku/input/parse_doc'
# Ugly things kept in a closet
require 'maruku/string_utils'
require 'maruku/input/linesource'
require 'maruku/input/type_detection'
# A class for reading and sanitizing inline HTML
require 'maruku/input/html_helper'
# Code for parsing Markdown block-level elements
require 'maruku/input/parse_block'
# Code for parsing Markdown span-level elements
require 'maruku/input/charsource'
require 'maruku/input/parse_span_better'
require 'maruku/input/rubypants'
require 'maruku/input/extensions'
require 'maruku/attributes'
require 'maruku/structures_iterators'
require 'maruku/errors_management'
# Code for creating a table of contents
require 'maruku/toc'
# Version and URL
require 'maruku/version'
# Exporting to html
require 'maruku/output/to_html'
# Exporting to latex
require 'maruku/output/to_latex'
require 'maruku/output/to_latex_strings'
require 'maruku/output/to_latex_entities'
# Pretty print
require 'maruku/output/to_markdown'
# Exporting to text: strips all formatting (not complete)
require 'maruku/output/to_s'
# class Maruku is the global interface
require 'maruku/maruku'

View file

@ -0,0 +1,462 @@
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
# NOTE: this is the old span-level regexp-based parser.
#
# The new parser is a real parser and is defined with functions in parse_span_better.rb
# The new parser is faster, handles syntax errors, but it's absolutely not readable.
#
# Also, regexp parsers simply CANNOT handle inline HTML properly.
# There are two black-magic methods `match_couple_of` and `map_match`,
# defined at the end of the file, that make the function
# `parse_lines_as_span` so elegant.
class Maruku
# Takes care of all span-level formatting, links, images, etc.
#
# Lines must not contain block-level elements.
def parse_lines_as_span(lines)
# first, get rid of linebreaks
res = resolve_linebreaks(lines)
span = MDElement.new(:dummy, res)
# encode all escapes
span.replace_each_string { |s| s.escape_md_special }
# The order of processing is significant:
# 1. inline code
# 2. immediate links
# 3. inline HTML
# 4. everything else
# search for ``code`` markers
span.match_couple_of('``') { |children, match1, match2|
e = create_md_element(:inline_code)
# this is now opaque to processing
e.meta[:raw_code] = children.join('').it_was_a_code_block
e
}
# Search for `single tick` code markers
span.match_couple_of('`') { |children, match1, match2|
e = create_md_element(:inline_code)
# this is now opaque to processing
e.meta[:raw_code] = children.join('').it_was_a_code_block
# this is now opaque to processing
e
}
# Detect any immediate link: <http://www.google.com>
# we expect an http: or something: at the beginning
span.map_match( /<(\w+:[^\>]+)>/) { |match|
url = match[1]
e = create_md_element(:immediate_link, [])
e.meta[:url] = url
e
}
# Search for inline HTML (the support is pretty basic for now)
# this searches for a matching block
inlineHTML1 = %r{
( # put everything in 1
< # open
(\w+) # opening tag in 2
> # close
.* # anything
</\2> # match closing tag
)
}x
# this searches for only one block
inlineHTML2 = %r{
( # put everything in 1
< # open
\w+ #
# close
[^<>]* # anything except
/> # closing tag
)
}x
for reg in [inlineHTML1, inlineHTML2]
span.map_match(reg) { |match|
raw_html = match[1]
convert_raw_html_in_list(raw_html)
}
end
# Detect footnotes references: [^1]
span.map_match(/\[(\^[^\]]+)\]/) { |match|
id = match[1].strip.downcase
e = create_md_element(:footnote_reference)
e.meta[:footnote_id] = id
e
}
# Detect any image like ![Alt text][url]
span.map_match(/\!\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match|
alt = match[1]
id = match[2].strip.downcase
if id.size == 0
id = text.strip.downcase
end
e = create_md_element(:image)
e.meta[:ref_id] = id
e
}
# Detect any immage with immediate url: ![Alt](url "title")
# a dummy ref is created and put in the symbol table
link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/
span.map_match(link1) { |match|
alt = match[1]
url = match[2]
title = match[3]
url = url.strip
# create a dummy id
id="dummy_#{@refs.size}"
@refs[id] = {:url=>url, :title=>title}
e = create_md_element(:image)
e.meta[:ref_id] = id
e
}
# an id reference: "[id]", "[ id ]"
reg_id_ref = %r{
\[ # opening bracket
([^\]]*) # 0 or more non-closing bracket (this is too permissive)
\] # closing bracket
}x
# validates a url, only $1 is set to the url
reg_url =
/((?:\w+):\/\/(?:\w+:{0,1}\w*@)?(?:\S+)(?::[0-9]+)?(?:\/|\/([\w#!:.?+=&%@!\-\/]))?)/
reg_url = %r{([^\s\]\)]+)}
# A string enclosed in quotes.
reg_title = %r{
" # opening
[^"]* # anything = 1
" # closing
}x
# [bah](http://www.google.com "Google.com"),
# [bah](http://www.google.com),
# [empty]()
reg_url_and_title = %r{
\( # opening
\s* # whitespace
#{reg_url}? # url = 1 might be empty
(?:\s+["'](.*)["'])? # optional title = 2
\s* # whitespace
\) # closing
}x
# Detect a link like ![Alt text][id]
span.map_match(/\[([^\]]+)\]\s?\[([^\]]*)\]/) { |match|
text = match[1]
id = match[2].strip.downcase
if id.size == 0
id = text.strip.downcase
end
children = parse_lines_as_span(text)
e = create_md_element(:link, children)
e.meta[:ref_id] = id
e
}
# Detect any immage with immediate url: ![Alt](url "title")
# a dummy ref is created and put in the symbol table
link1 = /!\[([^\]]+)\]\s?\(([^\s\)]*)(?:\s+["'](.*)["'])?\)/
span.map_match(link1) { |match|
text = match[1]
children = parse_lines_as_span(text)
url = match[2]
title = match[3]
url = url.strip
# create a dummy id
id="dummy_#{@refs.size}"
@refs[id] = {:url=>url, :title=>title}
@refs[id][:title] = title if title
e = create_md_element(:link, children)
e.meta[:ref_id] = id
e
}
# Detect any link like [Google engine][google]
span.match_couple_of('[', # opening bracket
%r{\] # closing bracket
[ ]? # optional whitespace
#{reg_id_ref} # ref id, with $1 being the reference
}x
) { |children, match1, match2|
id = match2[1]
id = id.strip.downcase
if id.size == 0
id = children.join.strip.downcase
end
e = create_md_element(:link, children)
e.meta[:ref_id] = id
e
}
# Detect any link with immediate url: [Google](http://www.google.com)
# XXX Note that the url can be empty: [Empty]()
# a dummy ref is created and put in the symbol table
span.match_couple_of('[', # opening bracket
%r{\] # closing bracket
[ ]? # optional whitespace
#{reg_url_and_title} # ref id, with $1 being the url and $2 being the title
}x
) { |children, match1, match2|
url = match2[1]
title = match2[3] # XXX? Is it a bug? I would use [2]
# create a dummy id
id="dummy_#{@refs.size}"
@refs[id] = {:url=>url}
@refs[id][:title] = title if title
e = create_md_element(:link, children)
e.meta[:ref_id] = id
e
}
# Detect an email address <andrea@invalid.it>
span.map_match(EMailAddress) { |match|
email = match[1]
e = create_md_element(:email_address, [])
e.meta[:email] = email
e
}
# Detect HTML entitis
span.map_match(/&([\w\d]+);/) { |match|
entity_name = match[1]
e = create_md_element(:entity, [])
e.meta[:entity_name] = entity_name
e
}
# And now the easy stuff
# search for ***strong and em***
span.match_couple_of('***') { |children,m1,m2|
create_md_element(:strong, [create_md_element(:emphasis, children)] ) }
span.match_couple_of('___') { |children,m1,m2|
create_md_element(:strong, [create_md_element(:emphasis, children)] ) }
# search for **strong**
span.match_couple_of('**') { |children,m1,m2| create_md_element(:strong, children) }
# search for __strong__
span.match_couple_of('__') { |children,m1,m2| create_md_element(:strong, children) }
# search for *emphasis*
span.match_couple_of('*') { |children,m1,m2| create_md_element(:emphasis, children) }
# search for _emphasis_
span.match_couple_of('_') { |children,m1,m2| create_md_element(:emphasis, children) }
# finally, unescape the special characters
span.replace_each_string { |s| s.unescape_md_special}
span.children
end
# returns array containing Strings or :linebreak elements
def resolve_linebreaks(lines)
res = []
s = ""
lines.each do |l|
s += (s.size>0 ? " " : "") + l.strip
if force_linebreak?(l)
res << s
res << create_md_element(:linebreak)
s = ""
end
end
res << s if s.size > 0
res
end
# raw_html is something like
# <em> A</em> dopwkk *maruk* <em>A</em>
def convert_raw_html_in_list(raw_html)
e = create_md_element(:raw_html)
e.meta[:raw_html] = raw_html
begin
e.meta[:parsed_html] = Document.new(raw_html)
rescue
$stderr.puts "convert_raw_html_in_list Malformed HTML:\n#{raw_html}"
end
e
end
end
# And now the black magic that makes the part above so elegant
class MDElement
# Try to match the regexp to each string in the hierarchy
# (using `replace_each_string`). If the regexp match, eliminate
# the matching string and substitute it with the pre_match, the
# result of the block, and the post_match
#
# ..., matched_string, ... -> ..., pre_match, block.call(match), post_match
#
# the block might return arrays.
#
def map_match(regexp, &block)
replace_each_string { |s|
processed = []
while (match = regexp.match(s))
# save the pre_match
processed << match.pre_match if match.pre_match && match.pre_match.size>0
# transform match
result = block.call(match)
# and append as processed
[*result].each do |e| processed << e end
# go on with the rest of the string
s = match.post_match
end
processed << s if s.size > 0
processed
}
end
# Finds couple of delimiters in a hierarchy of Strings and MDElements
#
# Open and close are two delimiters (like '[' and ']'), or two Regexp.
#
# If you don't pass close, it defaults to open.
#
# Each block is called with |contained children, match1, match2|
def match_couple_of(open, close=nil, &block)
close = close || open
open_regexp = open.kind_of?(Regexp) ? open : Regexp.new(Regexp.escape(open))
close_regexp = close.kind_of?(Regexp) ? close : Regexp.new(Regexp.escape(close))
# Do the same to children first
for c in @children; if c.kind_of? MDElement
c.match_couple_of(open_regexp, close_regexp, &block)
end end
processed_children = []
until @children.empty?
c = @children.shift
if c.kind_of? String
match1 = open_regexp.match(c)
if not match1
processed_children << c
else # we found opening, now search closing
# puts "Found opening (#{marker}) in #{c.inspect}"
# pre match is processed
processed_children.push match1.pre_match if
match1.pre_match && match1.pre_match.size > 0
# we will process again the post_match
@children.unshift match1.post_match if
match1.post_match && match1.post_match.size>0
contained = []; found_closing = false
until @children.empty? || found_closing
c = @children.shift
if c.kind_of? String
match2 = close_regexp.match(c)
if not match2
contained << c
else
# we found closing
found_closing = true
# pre match is contained
contained.push match2.pre_match if
match2.pre_match && match2.pre_match.size>0
# we will process again the post_match
@children.unshift match2.post_match if
match2.post_match && match2.post_match.size>0
# And now we call the block
substitute = block.call(contained, match1, match2)
processed_children << substitute
# puts "Found closing (#{marker}) in #{c.inspect}"
# puts "Children: #{contained.inspect}"
# puts "Substitute: #{substitute.inspect}"
end
else
contained << c
end
end
if not found_closing
# $stderr.puts "##### Could not find closing for #{open}, #{close} -- ignoring"
processed_children << match1.to_s
contained.reverse.each do |c|
@children.unshift c
end
end
end
else
processed_children << c
end
end
raise "BugBug" unless @children.empty?
rebuilt = []
# rebuild strings
processed_children.each do |c|
if c.kind_of?(String) && rebuilt.last && rebuilt.last.kind_of?(String)
rebuilt.last << c
else
rebuilt << c
end
end
@children = rebuilt
end
end

View file

@ -0,0 +1,226 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class String
def quote_if_needed
if /[\s\'\"]/.match self
inspect
else
self
end
end
end
module MaRuKu;
MagicChar = ':'
class AttributeList < Array
# An attribute list becomes
# {#id .cl key="val" ref}
# [ [:id, 'id'], [:class, 'id'], ['key', 'val'], [ :ref, 'ref' ]]
private :push
def push_key_val(key, val);
raise "Bad #{key.inspect}=#{val.inspect}" if not key and val
push [key, val]
end
def push_ref(ref_id);
raise "Bad :ref #{ref_id.inspect}" if not ref_id
push [:ref, ref_id+""]
# p "Now ", self ########################################
end
def push_class(val);
raise "Bad :id #{val.inspect}" if not val
push [:class, val]
end
def push_id(val);
raise "Bad :id #{val.inspect}" if not val
push [:id, val]
end
def to_s
map do |k,v|
case k
when :id; "#" + v.quote_if_needed
when :class; "." + v.quote_if_needed
when :ref; v.quote_if_needed
else k.quote_if_needed + "=" + v.quote_if_needed
end
end . join(' ')
end
alias to_md to_s
end
end
module MaRuKu; module In; module Markdown; module SpanLevelParser
def unit_tests_for_attribute_lists
[
[ "", [], "Empty lists are allowed" ],
[ "=", :throw, "Bad char to begin a list with." ],
[ "a =b", :throw, "No whitespace before `=`." ],
[ "a= b", :throw, "No whitespace after `=`." ],
[ "a b", [[:ref, 'a'],[:ref, 'b']], "More than one ref" ],
[ "a b c", [[:ref, 'a'],[:ref, 'b'],[:ref, 'c']], "More than one ref" ],
[ "hello notfound", [[:ref, 'hello'],[:ref, 'notfound']]],
[ "'a'", [[:ref, 'a']], "Quoted value." ],
[ '"a"' ],
[ "a=b", [['a','b']], "Simple key/val" ],
[ "'a'=b" ],
[ "'a'='b'" ],
[ "a='b'" ],
[ 'a="b\'"', [['a',"b\'"]], "Key/val with quotes" ],
[ 'a=b\''],
[ 'a="\\\'b\'"', [['a',"\'b\'"]], "Key/val with quotes" ],
['"', :throw, "Unclosed quotes"],
["'"],
["'a "],
['"a '],
[ "#a", [[:id, 'a']], "Simple ID" ],
[ "#'a'" ],
[ '#"a"' ],
[ "#", :throw, "Unfinished '#'." ],
[ ".", :throw, "Unfinished '.'." ],
[ "# a", :throw, "No white-space after '#'." ],
[ ". a", :throw, "No white-space after '.' ." ],
[ "a=b c=d", [['a','b'],['c','d']], "Tabbing" ],
[ " \ta=b \tc='d' "],
[ "\t a=b\t c='d'\t\t"],
[ ".\"a'", :throw, "Mixing quotes is bad." ],
].map { |s, expected, comment|
@expected = (expected ||= @expected)
@comment = (comment ||= (last=@comment) )
(comment == last && (comment += (@count+=1).to_s)) || @count = 1
expected = [md_ial(expected)] if expected.kind_of? Array
["{#{MagicChar}#{s}}", expected, "Attributes: #{comment}"]
}
end
def md_al(s=[]); AttributeList.new(s) end
# returns nil or an AttributeList
def read_attribute_list(src, con, break_on_chars)
separators = break_on_chars + [?=,?\ ,?\t]
escaped = Maruku::EscapedCharInQuotes
al = AttributeList.new
while true
src.consume_whitespace
break if break_on_chars.include? src.cur_char
case src.cur_char
when nil
maruku_error "Attribute list terminated by EOF:\n "+
"#{al.inspect}" , src, con
tell_user "I try to continue and return partial attribute list:\n"+
al.inspect
break
when ?= # error
maruku_error "In attribute lists, cannot start identifier with `=`."
tell_user "I try to continue"
src.ignore_char
when ?# # id definition
src.ignore_char
if id = read_quoted_or_unquoted(src, con, escaped, separators)
al.push_id id
else
maruku_error 'Could not read `id` attribute.', src, con
tell_user 'Trying to ignore bad `id` attribute.'
end
when ?. # class definition
src.ignore_char
if klass = read_quoted_or_unquoted(src, con, escaped, separators)
al.push_class klass
else
maruku_error 'Could not read `class` attribute.', src, con
tell_user 'Trying to ignore bad `class` attribute.'
end
else
if key = read_quoted_or_unquoted(src, con, escaped, separators)
if src.cur_char == ?=
src.ignore_char # skip the =
if val = read_quoted_or_unquoted(src, con, escaped, separators)
al.push_key_val(key, val)
else
maruku_error "Could not read value for key #{key.inspect}.",
src, con
tell_user "Ignoring key #{key.inspect}."
end
else
al.push_ref key
end
else
maruku_error 'Could not read key or reference.'
end
end # case
end # while true
al
end
def merge_ial(elements, src, con)
# We need a helper
def is_ial(e); e.kind_of? MDElement and e.node_type == :ial end
# Apply each IAL to the element before
elements.each_with_index do |e, i|
if is_ial(e) && i>= 1 then
before = elements[i-1]
after = elements[i+1]
if before.kind_of? MDElement
before.al = e.ial
elsif after.kind_of? MDElement
after.al = e.ial
else
maruku_error "It is not clear to me what element this IAL {:#{e.ial.to_md}} \n"+
"is referring to. The element before is a #{before.class.to_s}, \n"+
"the element after is a #{after.class.to_s}.\n"+
"\n before: #{before.inspect}"+
"\n after: #{after.inspect}",
src, con
# xxx dire se c'è empty vicino
end
end
end
if not Globals[:debug_keep_ials]
elements.delete_if {|x| is_ial(x) unless x == elements.first}
end
end
end end end end
#module MaRuKu; module In; module Markdown; module SpanLevelParser

View file

@ -0,0 +1,65 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
Globals = {
:unsafe_features => false,
:on_error => :warning,
:use_numbered_headers => false,
:maruku_signature => false,
:code_background_color => '#fef',
:code_show_spaces => false,
:html_math_engine => 'itex2mml', #ritex, itex2mml, none
:html_png_engine => 'none',
:html_png_dir => 'pngs',
:html_png_url => 'pngs/',
:html_png_resolution => 200,
:html_use_syntax => false,
:latex_use_listings => false,
:latex_cjk => false,
:debug_keep_ials => false,
}
class MDElement
def get_setting(sym)
if self.attributes.has_key?(sym) then
return self.attributes[sym]
elsif self.doc && self.doc.attributes.has_key?(sym) then
return self.doc.attributes[sym]
elsif MaRuKu::Globals.has_key?(sym)
return MaRuKu::Globals[sym]
else
$stderr.puts "Bug: no default for #{sym.inspect}"
nil
end
end
end
end

View file

@ -0,0 +1,92 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
#m Any method that detects formatting error calls the
#m maruku_error() method.
#m if @meta[:on_error] ==
#m
#m - :warning write on the standard err (or @error_stream if defined),
#m then do your best.
#m - :ignore be shy and try to continue
#m - :raise raises a MarukuException
#m
#m default is :raise
module MaRuKu
class Exception < RuntimeError
end
module Errors
def maruku_error(s,src=nil,con=nil)
policy = get_setting(:on_error)
case policy
when :ignore
when :raise
raise_error create_frame(describe_error(s,src,con))
when :warning
tell_user create_frame(describe_error(s,src,con))
else
raise "BugBug: policy = #{policy.inspect}"
end
end
def maruku_recover(s,src=nil,con=nil)
tell_user create_frame(describe_error(s,src,con))
end
alias error maruku_error
def raise_error(s)
raise MaRuKu::Exception, s, caller
end
def tell_user(s)
error_stream = self.attributes[:error_stream] || $stderr
error_stream << s
end
def create_frame(s)
n = 75
"\n" +
" "+"_"*n + "\n"+
"| Maruku tells you:\n" +
"+" + ("-"*n) +"\n"+
add_tabs(s,1,'| ') + "\n" +
"+" + ("-"*n) + "\n" +
add_tabs(caller[0, 5].join("\n"),1,'!') + "\n" +
"\\" + ("_"*n) + "\n"
end
def describe_error(s,src,con)
t = s
src && (t += "\n#{src.describe}\n")
con && (t += "\n#{con.describe}\n")
t
end
end # Errors
end # MaRuKu

View file

@ -0,0 +1,11 @@
require 'maruku/ext/math/elements'
require 'maruku/ext/math/parsing'
require 'maruku/ext/math/to_latex'
require 'maruku/ext/math/to_html'
require 'maruku/ext/math/mathml_engines/none'
require 'maruku/ext/math/mathml_engines/ritex'
require 'maruku/ext/math/mathml_engines/itex2mml'
require 'maruku/ext/math/mathml_engines/blahtex'

View file

@ -0,0 +1,26 @@
module MaRuKu; class MDElement
def md_inline_math(math)
self.md_el(:inline_math, [], meta={:math=>math})
end
def md_equation(math, label=nil)
reglabel= /\\label\{(\w+)\}/
if math =~ reglabel
label = $1
math.gsub!(reglabel,'')
end
# puts "Found label = #{label} math #{math.inspect} "
num = nil
if label && @doc #take number
@doc.eqid2eq ||= {}
num = @doc.eqid2eq.size + 1
end
e = self.md_el(:equation, [], meta={:math=>math, :label=>label,:num=>num})
if label && @doc #take number
@doc.eqid2eq[label] = e
end
e
end
end end

View file

@ -0,0 +1,108 @@
require 'tempfile'
require 'fileutils'
require 'digest/md5'
require 'pstore'
module MaRuKu; module Out; module HTML
PNG = Struct.new(:src,:depth,:height)
def convert_to_png_blahtex(kind, tex)
begin
FileUtils::mkdir_p MaRuKu::Globals[:html_png_dir]
# first, we check whether this image has already been processed
md5sum = Digest::MD5.hexdigest(tex+" params: ")
result_file = File.join(MaRuKu::Globals[:html_png_dir], md5sum+".txt")
if not File.exists?(result_file)
tmp_in = Tempfile.new('maruku_blahtex')
f = tmp_in.open
f.write tex
f.close
resolution = get_setting(:html_png_resolution)
options = "--png --use-preview-package --shell-dvipng 'dvipng -D #{resolution}' "
options += ("--png-directory '%s'" % MaRuKu::Globals[:html_png_dir])
cmd = "blahtex #{options} < #{tmp_in.path} > #{result_file}"
$stderr.puts "$ #{cmd}"
system cmd
tmp_in.delete
end
result = nil
f = File.open(result_file)
result = f.read
f.close
doc = Document.new(result, {:respect_whitespace =>:all})
png = doc.root.elements[1]
if png.name != 'png'
maruku_error "Blahtex error: \n#{doc}"
return nil
end
depth = png.elements['depth'] || (raise "No depth element in:\n #{doc}")
height = png.elements['height'] || (raise "No height element in:\n #{doc}")
md5 = png.elements['md5'] || (raise "No md5 element in:\n #{doc}")
depth = depth.text.to_f
height = height.text.to_f # XXX check != 0
md5 = md5.text
dir_url = MaRuKu::Globals[:html_png_url]
return PNG.new("#{dir_url}#{md5}.png", depth, height)
rescue Exception => e
maruku_error "Error: #{e}"
end
nil
end
BlahtexCache = PStore.new("blahtex_cache.pstore")
def convert_to_mathml_blahtex(kind, tex)
begin
BlahtexCache.transaction do
if BlahtexCache[tex].nil?
tmp_in = Tempfile.new('maruku_blahtex')
f = tmp_in.open
f.write tex
f.close
tmp_out = Tempfile.new('maruku_blahtex')
options = "--mathml"
cmd = "blahtex #{options} < #{tmp_in.path} > #{tmp_out.path}"
$stderr.puts "$ #{cmd}"
system cmd
tmp_in.delete
result = nil
File.open(tmp_out.path) do |f| result=f.read end
puts result
BlahtexCache[tex] = result
end
blahtex = BlahtexCache[tex]
doc = Document.new(blahtex, {:respect_whitespace =>:all})
mathml = doc.root.elements['mathml']
if not mathml
maruku_error "Blahtex error: \n#{doc}"
return nil
else
return mathml
end
end
rescue Exception => e
maruku_error "Error: #{e}"
end
nil
end
end end end

View file

@ -0,0 +1,29 @@
module MaRuKu; module Out; module HTML
def convert_to_mathml_itex2mml(kind, tex)
begin
if not $itex2mml_parser
require 'itextomml'
$itex2mml_parser = Itex2MML::Parser.new
end
itex_method = {:equation=>:block_filter,:inline=>:inline_filter}
mathml = $itex2mml_parser.send(itex_method[kind], tex)
doc = Document.new(mathml, {:respect_whitespace =>:all}).root
return doc
rescue LoadError => e
maruku_error "Could not load package 'itex2mml'.\n"+
"Please install it."
rescue REXML::ParseException => e
maruku_error "Invalid MathML TeX: \n#{add_tabs(tex,1,'tex>')}"+
"\n\n #{e.inspect}"
rescue
maruku_error "Could not produce MathML TeX: \n#{tex}"+
"\n\n #{e.inspect}"
end
nil
end
end end end

View file

@ -0,0 +1,20 @@
module MaRuKu; module Out; module HTML
def convert_to_mathml_none(kind, tex)
# You can: either return a REXML::Element
# return Element.new 'div'
# or return an empty array on error
# return []
# or have a string parsed by REXML:
tex = tex.gsub('&','&amp;')
mathml = "<code>#{tex}</code>"
return Document.new(mathml).root
end
def convert_to_png_none(kind, tex)
return nil
end
end end end

View file

@ -0,0 +1,24 @@
module MaRuKu; module Out; module HTML
def convert_to_mathml_ritex(kind, tex)
begin
if not $ritex_parser
require 'ritex'
$ritex_parser = Ritex::Parser.new
end
mathml = $ritex_parser.parse(tex.strip)
doc = Document.new(mathml, {:respect_whitespace =>:all}).root
return doc
rescue LoadError => e
maruku_error "Could not load package 'ritex'.\n"+
"Please install it using:\n"+
" $ gem install ritex\n\n"+e.inspect
rescue Racc::ParseError => e
maruku_error "Could not parse TeX: \n#{tex}"+
"\n\n #{e.inspect}"
end
nil
end
end end end

View file

@ -0,0 +1,82 @@
module MaRuKu
class MDDocument
# Hash equation id (String) to equation element (MDElement)
attr_accessor :eqid2eq
end
end
# At least one slash inside
#RegInlineMath1 = /\$([^\$]*[\\][^\$]*)\$/
# No spaces around the delimiters
#RegInlineMath2 = /\$([^\s\$](?:[^\$]*[^\s\$])?)\$/
#RegInlineMath = Regexp::union(RegInlineMath1,RegInlineMath2)
# Everything goes; takes care of escaping the "\$" inside the expression
RegInlineMath = /\${1}((?:[^\$]|\\\$)+)\$/
MaRuKu::In::Markdown::
register_span_extension(:chars => ?$, :regexp => RegInlineMath) do
|doc, src, con|
if m = src.read_regexp(RegInlineMath)
math = m.captures.compact.first
con.push doc.md_inline_math(math)
true
else
#puts "not math: #{src.cur_chars 10}"
false
end
end
EquationStart = /^[ ]{0,3}(?:\\\[|\$\$)(.*)$/
EqLabel = /(?:\((\w+)\))/
OneLineEquation = /^[ ]{0,3}(?:\\\[|\$\$)(.*)(?:\\\]|\$\$)\s*#{EqLabel}?\s*$/
EquationEnd = /^(.*)(?:\\\]|\$\$)\s*#{EqLabel}?\s*$/
MaRuKu::In::Markdown::
register_block_extension(:regexp => EquationStart) do |doc, src, con|
# puts "Equation :#{self}"
first = src.shift_line
if first =~ OneLineEquation
math = $1
label = $2
con.push doc.md_equation($1, $2)
else
first =~ EquationStart
math = $1
label = nil
while true
if not src.cur_line
maruku_error "Stream finished while reading equation\n\n"+
add_tabs(math,1,'$> '), src, con
break
end
line = src.shift_line
if line =~ EquationEnd
math += $1 + "\n"
label = $2 if $2
break
else
math += line + "\n"
end
end
con.push doc.md_equation(math, label)
end
true
end
# This adds support for \eqref
RegEqrefLatex = /\\eqref\{(\w+)\}/
RegEqPar = /\(eq:(\w+)\)/
RegEqref = Regexp::union(RegEqrefLatex, RegEqPar)
MaRuKu::In::Markdown::
register_span_extension(:chars => [?\\, ?(], :regexp => RegEqref) do
|doc, src, con|
eqid = src.read_regexp(RegEqref).captures.compact.first
r = doc.md_el(:eqref, [], meta={:eqid=>eqid})
con.push r
true
end

View file

@ -0,0 +1,170 @@
=begin maruku_doc
Extension: math
Attribute: html_math_engine
Scope: document, element
Output: html
Summary: Select the rendering engine for MathML.
Default: <?mrk Globals[:html_math_engine].to_s ?>
Select the rendering engine for math.
If you want to use your custom engine `foo`, then set:
HTML math engine: foo
{:lang=markdown}
and then implement two functions:
def convert_to_mathml_foo(kind, tex)
...
end
=end
=begin maruku_doc
Extension: math
Attribute: html_png_engine
Scope: document, element
Output: html
Summary: Select the rendering engine for math.
Default: <?mrk Globals[:html_math_engine].to_s ?>
Same thing as `html_math_engine`, only for PNG output.
def convert_to_png_foo(kind, tex)
# same thing
...
end
{:lang=ruby}
=end
module MaRuKu; module Out; module HTML
# Creates an xml Mathml document of self.math
def render_mathml(kind, tex)
engine = get_setting(:html_math_engine)
method = "convert_to_mathml_#{engine}".to_sym
if self.respond_to? method
mathml = self.send(method, kind, tex)
return mathml || convert_to_mathml_none(kind, tex)
else
puts "A method called #{method} should be defined."
return convert_to_mathml_none(kind, tex)
end
end
# Creates an xml Mathml document of self.math
def render_png(kind, tex)
engine = get_setting(:html_png_engine)
method = "convert_to_png_#{engine}".to_sym
if self.respond_to? method
return self.send(method, kind, tex)
else
puts "A method called #{method} should be defined."
return nil
end
end
def pixels_per_ex
if not $pixels_per_ex
x = render_png(:inline, "x")
$pixels_per_ex = x.height # + x.depth
end
$pixels_per_ex
end
def adjust_png(png, use_depth)
src = png.src
height_in_px = png.height
depth_in_px = png.depth
height_in_ex = height_in_px / pixels_per_ex
depth_in_ex = depth_in_px / pixels_per_ex
total_height_in_ex = height_in_ex + depth_in_ex
style = ""
style += "vertical-align: -#{depth_in_ex}ex;" if use_depth
style += "height: #{total_height_in_ex}ex;"
img = Element.new 'img'
img.attributes['src'] = src
img.attributes['style'] = style
img.attributes['alt'] = "equation"
img
end
def to_html_inline_math
mathml = render_mathml(:inline, self.math)
png = render_png(:inline, self.math)
span = create_html_element 'span'
add_class_to(span, 'maruku-inline')
if mathml
add_class_to(mathml, 'maruku-mathml')
span << mathml
end
if png
img = adjust_png(png, use_depth=true)
add_class_to(img, 'maruku-png')
span << img
end
span
end
def to_html_equation
mathml = render_mathml(:equation, self.math)
png = render_png(:equation, self.math)
div = create_html_element 'div'
add_class_to(div, 'maruku-equation')
if self.label # then numerate
span = Element.new 'span'
span.attributes['class'] = 'maruku-eq-number'
num = self.num
span << Text.new("(#{num})")
div << span
div.attributes['id'] = "eq:#{self.label}"
end
if mathml
add_class_to(mathml, 'maruku-mathml')
div << mathml
end
if png
img = adjust_png(png, use_depth=false)
add_class_to(img, 'maruku-png')
div << img
end
source_div = Element.new 'div'
add_class_to(source_div, 'maruku-eq-tex')
code = convert_to_mathml_none(:equation, self.math)
code.attributes['style'] = 'display: none'
source_div << code
div << source_div
div
end
def to_html_eqref
if eq = self.doc.eqid2eq[self.eqid]
num = eq.num
a = Element.new 'a'
a.attributes['class'] = 'maruku-eqref'
a.attributes['href'] = "#eq:#{self.eqid}"
a << Text.new("(#{num})")
a
else
maruku_error "Cannot find equation #{self.eqid.inspect}"
Text.new "(eq:#{self.eqid})"
end
end
end end end

View file

@ -0,0 +1,21 @@
module MaRuKu; module Out; module Latex
def to_latex_inline_math
"$#{self.math.strip}$"
end
def to_latex_equation
if self.label
l = "\\label{#{self.label}}"
"\\begin{equation}\n#{self.math.strip}\n#{l}\\end{equation}\n"
else
"\\begin{displaymath}\n#{self.math.strip}\n\\end{displaymath}\n"
end
end
def to_latex_eqref
"\\eqref{#{self.eqid}}"
end
end end end

View file

@ -0,0 +1,259 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
# A series of helper functions for creating elements: they hide the
# particular internal representation.
#
# Please, always use these instead of creating MDElement.
#
module MaRuKu
module Helpers
# if the first is a md_ial, it is used as such
def md_el(node_type, children=[], meta={}, al=nil)
if (e=children.first).kind_of?(MDElement) and
e.node_type == :ial then
if al
al += e.ial
else
al = e.ial
end
children.shift
end
e = MDElement.new(node_type, children, meta, al)
e.doc = @doc
return e
end
def md_header(level, children, al=nil)
md_el(:header, children, {:level => level}, al)
end
# Inline code
def md_code(code, al=nil)
md_el(:inline_code, [], {:raw_code => code}, al)
end
# Code block
def md_codeblock(source, al=nil)
md_el(:code, [], {:raw_code => source}, al)
end
def md_quote(children, al=nil)
md_el(:quote, children, {}, al)
end
def md_li(children, want_my_par, al=nil)
md_el(:li, children, {:want_my_paragraph=>want_my_par}, al)
end
def md_footnote(footnote_id, children, al=nil)
md_el(:footnote, children, {:footnote_id=>footnote_id}, al)
end
def md_abbr_def(abbr, text, al=nil)
md_el(:abbr_def, [], {:abbr=>abbr, :text=>text}, al)
end
def md_abbr(abbr, title)
md_el(:abbr, [abbr], {:title=>title})
end
def md_html(raw_html, al=nil)
e = md_el(:raw_html, [], {:raw_html=>raw_html})
begin
# remove newlines and whitespace at begin
# end end of string, or else REXML gets confused
raw_html = raw_html.gsub(/\A\s*</,'<').
gsub(/>[\s\n]*\Z/,'>')
raw_html = "<marukuwrap>#{raw_html}</marukuwrap>"
e.instance_variable_set :@parsed_html,
REXML::Document.new(raw_html)
rescue
# tell_user "Malformed block of HTML:\n"+
# add_tabs(raw_html,1,'|')
# " #{raw_html.inspect}\n\n"+ex.inspect
end
e
end
def md_link(children, ref_id, al=nil)
md_el(:link, children, {:ref_id=>ref_id.downcase}, al)
end
def md_im_link(children, url, title=nil, al=nil)
md_el(:im_link, children, {:url=>url,:title=>title}, al)
end
def md_image(children, ref_id, al=nil)
md_el(:image, children, {:ref_id=>ref_id}, al)
end
def md_im_image(children, url, title=nil, al=nil)
md_el(:im_image, children, {:url=>url,:title=>title},al)
end
def md_em(children, al=nil)
md_el(:emphasis, [children].flatten, {}, al)
end
def md_br()
md_el(:linebreak, [], {}, nil)
end
def md_hrule()
md_el(:hrule, [], {}, nil)
end
def md_strong(children, al=nil)
md_el(:strong, [children].flatten, {}, al)
end
def md_emstrong(children, al=nil)
md_strong(md_em(children), al)
end
# <http://www.example.com/>
def md_url(url, al=nil)
md_el(:immediate_link, [], {:url=>url}, al)
end
# <andrea@rubyforge.org>
# <mailto:andrea@rubyforge.org>
def md_email(email, al=nil)
md_el(:email_address, [], {:email=>email}, al)
end
def md_entity(entity_name, al=nil)
md_el(:entity, [], {:entity_name=>entity_name}, al)
end
# Markdown extra
def md_foot_ref(ref_id, al=nil)
md_el(:footnote_reference, [], {:footnote_id=>ref_id}, al)
end
def md_par(children, al=nil)
md_el(:paragraph, children, meta={}, al)
end
# [1]: http://url [properties]
def md_ref_def(ref_id, url, title=nil, meta={}, al=nil)
meta[:url] = url
meta[:ref_id] = ref_id
meta[:title] = title if title
md_el(:ref_definition, [], meta, al)
end
# inline attribute list
def md_ial(al)
al = Maruku::AttributeList.new(al) if
not al.kind_of?Maruku::AttributeList
md_el(:ial, [], {:ial=>al})
end
# Attribute list definition
def md_ald(id, al)
md_el(:ald, [], {:ald_id=>id,:ald=>al})
end
# Server directive <?target code... ?>
def md_xml_instr(target, code)
md_el(:xml_instr, [], {:target=>target, :code=>code})
end
end
end
module MaRuKu
class MDElement
# outputs abbreviated form (this should be eval()uable to get the document)
def inspect2
s =
case @node_type
when :paragraph
"md_par(%s)" % children_inspect
when :footnote_reference
"md_foot_ref(%s)" % self.footnote_id.inspect
when :entity
"md_entity(%s)" % self.entity_name.inspect
when :email_address
"md_email(%s)" % self.email.inspect
when :inline_code
"md_code(%s)" % self.raw_code.inspect
when :raw_html
"md_html(%s)" % self.raw_html.inspect
when :emphasis
"md_em(%s)" % children_inspect
when :strong
"md_strong(%s)" % children_inspect
when :immediate_link
"md_url(%s)" % self.url.inspect
when :image
"md_image(%s, %s)" % [
children_inspect,
self.ref_id.inspect]
when :im_image
"md_im_image(%s, %s, %s)" % [
children_inspect,
self.url.inspect,
self.title.inspect]
when :link
"md_link(%s,%s)" % [
children_inspect, self.ref_id.inspect]
when :im_link
"md_im_link(%s, %s, %s)" % [
children_inspect,
self.url.inspect,
self.title.inspect,
]
when :ref_definition
"md_ref_def(%s, %s, %s)" % [
self.ref_id.inspect,
self.url.inspect,
self.title.inspect
]
when :ial
"md_ial(%s)" % self.ial.inspect
else
return nil
end
if @al and not @al.empty? then
s = s.chop + ", #{@al.inspect})"
end
s
end
end
end

View file

@ -0,0 +1,326 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu; module In; module Markdown; module SpanLevelParser
# a string scanner coded by me
class CharSourceManual; end
# a wrapper around StringScanner
class CharSourceStrscan; end
# A debug scanner that checks the correctness of both
# by comparing their output
class CharSourceDebug; end
# Choose!
CharSource = CharSourceManual # faster! 58ms vs. 65ms
#CharSource = CharSourceStrscan
#CharSource = CharSourceDebug
class CharSourceManual
include MaRuKu::Strings
def initialize(s, parent=nil)
raise "Passed #{s.class}" if not s.kind_of? String
@buffer = s
@buffer_index = 0
@parent = parent
end
# Return current char as a FixNum (or nil).
def cur_char; @buffer[@buffer_index] end
# Return the next n chars as a String.
def cur_chars(n); @buffer[@buffer_index,n] end
# Return the char after current char as a FixNum (or nil).
def next_char; @buffer[@buffer_index+1] end
def shift_char
c = @buffer[@buffer_index]
@buffer_index+=1
c
end
def ignore_char
@buffer_index+=1
nil
end
def ignore_chars(n)
@buffer_index+=n
nil
end
def current_remaining_buffer
@buffer[@buffer_index, @buffer.size-@buffer_index]
end
def cur_chars_are(string)
# There is a bug here
if false
r2 = /^.{#{@buffer_index}}#{Regexp.escape string}/m
@buffer =~ r2
else
cur_chars(string.size) == string
end
end
def next_matches(r)
r2 = /^.{#{@buffer_index}}#{r}/m
md = r2.match @buffer
return !!md
end
def read_regexp3(r)
r2 = /^.{#{@buffer_index}}#{r}/m
m = r2.match @buffer
if m
consumed = m.to_s.size - @buffer_index
# puts "Consumed #{consumed} chars (entire is #{m.to_s.inspect})"
ignore_chars consumed
else
# puts "Could not read regexp #{r2.inspect} from buffer "+
# " index=#{@buffer_index}"
# puts "Cur chars = #{cur_chars(20).inspect}"
# puts "Matches? = #{cur_chars(20) =~ r}"
end
m
end
def read_regexp(r)
r2 = /^#{r}/
rest = current_remaining_buffer
m = r2.match(rest)
if m
@buffer_index += m.to_s.size
# puts "#{r} matched #{rest.inspect}: #{m.to_s.inspect}"
end
return m
end
def consume_whitespace
while c = cur_char
if (c == 32 || c == ?\t)
# puts "ignoring #{c}"
ignore_char
else
# puts "#{c} is not ws: "<<c
break
end
end
end
def read_text_chars(out)
s = @buffer.size; c=nil
while @buffer_index < s && (c=@buffer[@buffer_index]) &&
((c>=?a && c<=?z) || (c>=?A && c<=?Z))
out << c
@buffer_index += 1
end
end
def describe
s = describe_pos(@buffer, @buffer_index)
if @parent
s += "\n\n" + @parent.describe
end
s
end
include SpanLevelParser
end
def describe_pos(buffer, buffer_index)
len = 75
num_before = [len/2, buffer_index].min
num_after = [len/2, buffer.size-buffer_index].min
num_before_max = buffer_index
num_after_max = buffer.size-buffer_index
# puts "num #{num_before} #{num_after}"
num_before = [num_before_max, len-num_after].min
num_after = [num_after_max, len-num_before].min
# puts "num #{num_before} #{num_after}"
index_start = [buffer_index - num_before, 0].max
index_end = [buffer_index + num_after, buffer.size].min
size = index_end- index_start
# puts "- #{index_start} #{size}"
str = buffer[index_start, size]
str.gsub!("\n",'N')
str.gsub!("\t",'T')
if index_end == buffer.size
str += "EOF"
end
pre_s = buffer_index-index_start
pre_s = [pre_s, 0].max
pre_s2 = [len-pre_s,0].max
# puts "pre_S = #{pre_s}"
pre =" "*(pre_s)
"-"*len+"\n"+
str + "\n" +
"-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
# pre + "|\n"+
pre + "+--- Byte #{buffer_index}\n"+
"Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
add_tabs(buffer,1,">")
# "CharSource: At character #{@buffer_index} of block "+
# " beginning with:\n #{@buffer[0,50].inspect} ...\n"+
# " before: \n ... #{cur_chars(50).inspect} ... "
end
require 'strscan'
class CharSourceStrscan
include SpanLevelParser
include MaRuKu::Strings
def initialize(s, parent=nil)
@s = StringScanner.new(s)
@parent = parent
end
# Return current char as a FixNum (or nil).
def cur_char
@s.peek(1)[0]
end
# Return the next n chars as a String.
def cur_chars(n);
@s.peek(n)
end
# Return the char after current char as a FixNum (or nil).
def next_char;
@s.peek(2)[1]
end
def shift_char
(@s.get_byte)[0]
end
def ignore_char
@s.get_byte
nil
end
def ignore_chars(n)
n.times do @s.get_byte end
nil
end
def current_remaining_buffer
@s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index]
end
def cur_chars_are(string)
cur_chars(string.size) == string
end
def next_matches(r)
len = @s.match?(r)
return !!len
end
def read_regexp(r)
string = @s.scan(r)
if string
return r.match(string)
else
return nil
end
end
def consume_whitespace
@s.scan /\s+/
nil
end
def describe
describe_pos(@s.string, @s.pos)
end
end
class CharSourceDebug
def initialize(s, parent)
@a = CharSourceManual.new(s, parent)
@b = CharSourceStrscan.new(s, parent)
end
def method_missing(methodname, *args)
a_bef = @a.describe
b_bef = @b.describe
a = @a.send(methodname, *args)
b = @b.send(methodname, *args)
# if methodname == :describe
# return a
# end
if a.kind_of? MatchData
if a.to_a != b.to_a
puts "called: #{methodname}(#{args})"
puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
puts "AFTER: "+@a.describe
puts "AFTER: "+@b.describe
puts "BEFORE: "+a_bef
puts "BEFORE: "+b_bef
puts caller.join("\n")
exit
end
else
if a!=b
puts "called: #{methodname}(#{args})"
puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
puts ""+@a.describe
puts ""+@b.describe
puts caller.join("\n")
exit
end
end
if @a.cur_char != @b.cur_char
puts "Fuori sincronia dopo #{methodname}(#{args})"
puts ""+@a.describe
puts ""+@b.describe
exit
end
return a
end
end
end end end end

View file

@ -0,0 +1,68 @@
module MaRuKu; module In; module Markdown
# Hash Fixnum -> name
SpanExtensionsTrigger = {}
class SpanExtension
# trigging chars
attr_accessor :chars
# trigging regexp
attr_accessor :regexp
# lambda
attr_accessor :block
end
# Hash String -> Extension
SpanExtensions = {}
def check_span_extensions(src, con)
c = src.cur_char
if extensions = SpanExtensionsTrigger[c]
extensions.each do |e|
if e.regexp && (match = src.next_matches(e.regexp))
return true if e.block.call(doc, src, con)
end
end
end
return false # not special
end
def self.register_span_extension(args, &block)
e = SpanExtension.new
e.chars = [*args[:chars]]
e.regexp = args[:regexp]
e.block = block
e.chars.each do |c|
(SpanExtensionsTrigger[c] ||= []).push e
end
end
def self.register_block_extension(args, &block)
regexp = args[:regexp]
BlockExtensions[regexp] = block
end
# Hash Regexp -> Block
BlockExtensions = {}
def check_block_extensions(src, con, line)
BlockExtensions.each do |reg, block|
if m = reg.match(line)
block = BlockExtensions[reg]
return true if block.call(doc, src, con)
end
end
return false # not special
end
def any_matching_block_extension?(line)
BlockExtensions.each_key do |reg|
m = reg.match(line)
return m if m
end
return false
end
end end end

View file

@ -0,0 +1,175 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu; module In; module Markdown; module SpanLevelParser
# This class helps me read and sanitize HTML blocks
# I tried to do this with REXML, but wasn't able to. (suggestions?)
class HTMLHelper
include MaRuKu::Strings
Tag = %r{^<(/)?(\w+)\s*([^>]*)>}m
PartialTag = %r{^<.*}m
EverythingElse = %r{^[^<]+}m
CommentStart = %r{^<!--}x
CommentEnd = %r{^.*-->}
TO_SANITIZE = ['img','hr']
attr_reader :rest
def initialize
@rest = ""
@tag_stack = []
@m = nil
@already = ""
self.state = :inside_element
end
attr_accessor :state # :inside_element, :inside_tag, :inside_comment,
def eat_this(line)
@rest = line + @rest
things_read = 0
until @rest.empty?
case self.state
when :inside_comment
if @m = CommentEnd.match(@rest)
@already += @m.pre_match + @m.to_s
@rest = @m.post_match
self.state = :inside_element
else
@already += @rest
@rest = ""
self.state = :inside_comment
end
when :inside_element
if @m = CommentStart.match(@rest)
things_read += 1
@already += @m.pre_match + @m.to_s
@rest = @m.post_match
self.state = :inside_comment
elsif @m = Tag.match(@rest) then
things_read += 1
handle_tag
self.state = :inside_element
elsif @m = PartialTag.match(@rest) then
@already += @m.pre_match
@rest = @m.post_match
@partial_tag = @m.to_s
self.state = :inside_tag
elsif @m = EverythingElse.match(@rest)
@already += @m.pre_match + @m.to_s
@rest = @m.post_match
self.state = :inside_element
else
error "Malformed HTML: not complete: #{@rest.inspect}"
end
when :inside_tag
if @m = /^[^>]*>/.match(@rest) then
@partial_tag += @m.to_s
@rest = @partial_tag + @m.post_match
@partial_tag = nil
self.state = :inside_element
else
@partial_tag += @rest
@rest = ""
self.state = :inside_tag
end
else
raise "Bug bug: state = #{self.state.inspect}"
end # not inside comment
# puts inspect
# puts "Read: #{@tag_stack.inspect}"
break if is_finished? and things_read>0
end
end
def handle_tag()
@already += @m.pre_match
@rest = @m.post_match
is_closing = !!@m[1]
tag = @m[2]
attributes = @m[3]
is_single = false
if attributes =~ /\A(.*)\/\Z/
attributes = $1
is_single = true
end
# puts "READ TAG #{@m.to_s.inspect} tag = #{tag} closing? #{is_closing} single = #{is_single}"
if TO_SANITIZE.include? tag
attributes.strip!
# puts "Attributes: #{attributes.inspect}"
if attributes.size > 0
@already += '<%s %s />' % [tag, attributes]
else
@already += '<%s />' % [tag]
end
elsif is_closing
@already += @m.to_s
if @tag_stack.empty?
error "Malformed: closing tag #{tag.inspect} "+
"in empty list"
end
if @tag_stack.last != tag
error "Malformed: tag <#{tag}> "+
"closes <#{@tag_stack.last}>"
end
@tag_stack.pop
else
@already += @m.to_s
@tag_stack.push(tag) unless is_single
end
end
def error(s)
raise Exception, "Error: #{s} \n"+ inspect, caller
end
def inspect; "HTML READER\n state=#{self.state} "+
"match=#{@m.to_s.inspect}\n"+
"Tag stack = #{@tag_stack.inspect} \n"+
"Before:\n"+
add_tabs(@already,1,'|')+"\n"+
"After:\n"+
add_tabs(@rest,1,'|')+"\n"
end
def stuff_you_read
@already
end
def is_finished?
(self.state == :inside_element) and @tag_stack.empty?
end
end # html helper
end end end end

View file

@ -0,0 +1,110 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu; module In; module Markdown; module BlockLevelParser
# This represents a source of lines that can be consumed.
#
# It is the twin of CharSource.
#
class LineSource
include MaRuKu::Strings
def initialize(lines, parent=nil, parent_offset=nil)
raise "NIL lines? " if not lines
@lines = lines
@lines_index = 0
@parent = parent
@parent_offset = parent_offset
end
def cur_line() @lines[@lines_index] end
def next_line() @lines[@lines_index+1] end
def shift_line()
raise "Over the rainbow" if @lines_index >= @lines.size
l = @lines[@lines_index]
@lines_index += 1
return l
end
def ignore_line
raise "Over the rainbow" if @lines_index >= @lines.size
@lines_index += 1
end
def describe
s = "At line #{original_line_number(@lines_index)}\n"
context = 3 # lines
from = [@lines_index-context, 0].max
to = [@lines_index+context, @lines.size-1].min
for i in from..to
prefix = (i == @lines_index) ? '--> ' : ' ';
l = @lines[i]
s += "%10s %4s|%s" %
[@lines[i].md_type.to_s, prefix, l]
s += "|\n"
end
# if @parent
# s << "Parent context is: \n"
# s << add_tabs(@parent.describe,1,'|')
# end
s
end
def original_line_number(index)
if @parent
return index + @parent.original_line_number(@parent_offset)
else
1 + index
end
end
def cur_index
@lines_index
end
# Returns the type of next line as a string
# breaks at first :definition
def tell_me_the_future
s = ""; num_e = 0;
for i in @lines_index..@lines.size-1
c = case @lines[i].md_type
when :text; "t"
when :empty; num_e+=1; "e"
when :definition; "d"
else "o"
end
s += c
break if c == "d" or num_e>1
end
s
end
end # linesource
end end end end # block

View file

@ -0,0 +1,596 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu; module In; module Markdown; module BlockLevelParser
include Helpers
include MaRuKu::Strings
include MaRuKu::In::Markdown::SpanLevelParser
class BlockContext < Array
def describe
n = 5
desc = size > n ? self[-n,n] : self
"Last #{n} elements: "+
desc.map{|x| "\n -" + x.inspect}.join
end
end
# Splits the string and calls parse_lines_as_markdown
def parse_text_as_markdown(text)
lines = split_lines(text)
src = LineSource.new(lines)
return parse_blocks(src)
end
# Input is a LineSource
def parse_blocks(src)
output = BlockContext.new
# run state machine
while src.cur_line
next if check_block_extensions(src, output, src.cur_line)
# Prints detected type (useful for debugging)
# puts "#{src.cur_line.md_type}|#{src.cur_line}"
case src.cur_line.md_type
when :empty;
output.push :empty
src.ignore_line
when :ial
m = InlineAttributeList.match src.shift_line
content = m[1] || ""
# puts "Content: #{content.inspect}"
src2 = CharSource.new(content, src)
interpret_extension(src2, output, [nil])
when :ald
output.push read_ald(src)
when :text
if src.cur_line =~ MightBeTableHeader and
(src.next_line && src.next_line =~ TableSeparator)
output.push read_table(src)
elsif [:header1,:header2].include? src.next_line.md_type
output.push read_header12(src)
elsif eventually_comes_a_def_list(src)
definition = read_definition(src)
if output.last.kind_of?(MDElement) &&
output.last.node_type == :definition_list then
output.last.children << definition
else
output.push md_el(:definition_list, [definition])
end
else # Start of a paragraph
output.push read_paragraph(src)
end
when :header2, :hrule
# hrule
src.shift_line
output.push md_hrule()
when :header3
output.push read_header3(src)
when :ulist, :olist
list_type = src.cur_line.md_type == :ulist ? :ul : :ol
li = read_list_item(src)
# append to current list if we have one
if output.last.kind_of?(MDElement) &&
output.last.node_type == list_type then
output.last.children << li
else
output.push md_el(list_type, [li])
end
when :quote; output.push read_quote(src)
when :code; e = read_code(src); output << e if e
when :raw_html; e = read_raw_html(src); output << e if e
when :footnote_text; output.push read_footnote_text(src)
when :ref_definition; read_ref_definition(src, output)
when :abbreviation; output.push read_abbreviation(src)
when :xml_instr; read_xml_instruction(src, output)
when :metadata;
maruku_error "Please use the new meta-data syntax: \n"+
" http://maruku.rubyforge.org/proposal.html\n", src
src.ignore_line
else # warn if we forgot something
md_type = src.cur_line.md_type
line = src.cur_line
maruku_error "Ignoring line '#{line}' type = #{md_type}", src
src.shift_line
end
end
merge_ial(output, src, output)
output.delete_if {|x| x.kind_of?(MDElement) &&
x.node_type == :ial}
# get rid of empty line markers
output.delete_if {|x| x == :empty}
# See for each list if we can omit the paragraphs and use li_span
# TODO: do this after
output.each do |c|
# Remove paragraphs that we can get rid of
if [:ul,:ol].include? c.node_type
if c.children.all? {|li| !li.want_my_paragraph} then
c.children.each do |d|
d.node_type = :li_span
d.children = d.children[0].children
end
end
end
if c.node_type == :definition_list
if c.children.all?{|defi| !defi.want_my_paragraph} then
c.children.each do |definition|
definition.definitions.each do |dd|
dd.children = dd.children[0].children
end
end
end
end
end
output
end
def read_ald(src)
if (l=src.shift_line) =~ AttributeDefinitionList
id = $1; al=$2;
al = read_attribute_list(CharSource.new(al,src), context=nil, break_on=[nil])
self.ald[id] = al;
return md_ald(id, al)
else
maruku_error "Bug Bug:\n#{l.inspect}"
return nil
end
end
# reads a header (with ----- or ========)
def read_header12(src)
line = src.shift_line.strip
al = nil
# Check if there is an IAL
if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
line = $1.strip
ial = $2
al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
end
text = parse_lines_as_span [ line ]
level = src.cur_line.md_type == :header2 ? 2 : 1;
src.shift_line
return md_header(level, text, al)
end
# reads a header like '#### header ####'
def read_header3(src)
line = src.shift_line.strip
al = nil
# Check if there is an IAL
if new_meta_data? and line =~ /^(.*)\{(.*)\}\s*$/
line = $1.strip
ial = $2
al = read_attribute_list(CharSource.new(ial,src), context=nil, break_on=[nil])
end
level = num_leading_hashes(line)
text = parse_lines_as_span [strip_hashes(line)]
return md_header(level, text, al)
end
def read_xml_instruction(src, output)
m = /^\s*<\?((\w+)\s*)?(.*)$/.match src.shift_line
raise "BugBug" if not m
target = m[2] || ''
code = m[3]
until code =~ /\?>/
code += "\n"+src.shift_line
end
if not code =~ (/\?>\s*$/)
garbage = (/\?>(.*)$/.match(code))[1]
maruku_error "Trailing garbage on last line: #{garbage.inspect}:\n"+
add_tabs(code, 1, '|'), src
end
code.gsub!(/\?>\s*$/, '')
if target == 'mrk' && MaRuKu::Globals[:unsafe_features]
result = safe_execute_code(self, code)
if result
if result.kind_of? String
raise "Not expected"
else
output.push *result
end
end
else
output.push md_xml_instr(target, code)
end
end
def read_raw_html(src)
h = HTMLHelper.new
begin
h.eat_this(l=src.shift_line)
# puts "\nBLOCK:\nhtml -> #{l.inspect}"
while src.cur_line and not h.is_finished?
l=src.shift_line
# puts "html -> #{l.inspect}"
h.eat_this "\n"+l
end
rescue Exception => e
ex = e.inspect + e.backtrace.join("\n")
maruku_error "Bad block-level HTML:\n#{add_tabs(ex,1,'|')}\n", src
end
raw_html = h.stuff_you_read
return md_html(raw_html)
end
def read_paragraph(src)
lines = []
while src.cur_line
# :olist does not break
case t = src.cur_line.md_type
when :quote,:header3,:empty,:raw_html,:ref_definition,:ial,:xml_instr
break
when :olist,:ulist
break if src.next_line.md_type == t
end
break if src.cur_line.strip.size == 0
break if [:header1,:header2].include? src.next_line.md_type
break if any_matching_block_extension?(src.cur_line)
lines << src.shift_line
end
# dbg_describe_ary(lines, 'PAR')
children = parse_lines_as_span(lines, src)
return md_par(children)
end
# Reads one list item, either ordered or unordered.
def read_list_item(src)
parent_offset = src.cur_index
item_type = src.cur_line.md_type
first = src.shift_line
# Ugly things going on inside `read_indented_content`
indentation = spaces_before_first_char(first)
break_list = [:ulist, :olist, :ial]
lines, want_my_paragraph =
read_indented_content(src,indentation, break_list, item_type)
# add first line
# Strip first '*', '-', '+' from first line
stripped = first[indentation, first.size-1]
lines.unshift stripped
#dbg_describe_ary(lines, 'LIST ITEM ')
src2 = LineSource.new(lines, src, parent_offset)
children = parse_blocks(src2)
with_par = want_my_paragraph || (children.size>1)
return md_li(children, with_par)
end
def read_abbreviation(src)
if not (l=src.shift_line) =~ Abbreviation
maruku_error "Bug: it's Andrea's fault. Tell him.\n#{l.inspect}"
end
abbr = $1
desc = $2
if (not abbr) or (abbr.size==0)
maruku_error "Bad abbrev. abbr=#{abbr.inspect} desc=#{desc.inspect}"
end
self.abbreviations[abbr] = desc
return md_abbr_def(abbr, desc)
end
def read_footnote_text(src)
parent_offset = src.cur_index
first = src.shift_line
if not first =~ FootnoteText
maruku_error "Bug (it's Andrea's fault)"
end
id = $1
text = $2
# Ugly things going on inside `read_indented_content`
indentation = 4 #first.size-text.size
# puts "id =_#{id}_; text=_#{text}_ indent=#{indentation}"
break_list = [:footnote_text]
item_type = :footnote_text
lines, want_my_paragraph =
read_indented_content(src,indentation, break_list, item_type)
# add first line
if text && text.strip != "" then lines.unshift text end
# dbg_describe_ary(lines, 'FOOTNOTE')
src2 = LineSource.new(lines, src, parent_offset)
children = parse_blocks(src2)
e = md_footnote(id, children)
self.footnotes[id] = e
return e
end
# This is the only ugly function in the code base.
# It is used to read list items, descriptions, footnote text
def read_indented_content(src, indentation, break_list, item_type)
lines =[]
# collect all indented lines
saw_empty = false; saw_anything_after = false
while src.cur_line
#puts "#{src.cur_line.md_type} #{src.cur_line.inspect}"
if src.cur_line.md_type == :empty
saw_empty = true
lines << src.shift_line
next
end
# after a white line
if saw_empty
# we expect things to be properly aligned
if (ns=number_of_leading_spaces(src.cur_line)) < indentation
#puts "breaking for spaces, only #{ns}: #{src.cur_line}"
break
end
saw_anything_after = true
else
break if break_list.include? src.cur_line.md_type
# break if src.cur_line.md_type != :text
end
stripped = strip_indent(src.shift_line, indentation)
lines << stripped
#puts "Accepted as #{stripped.inspect}"
# You are only required to indent the first line of
# a child paragraph.
if stripped.md_type == :text
while src.cur_line && (src.cur_line.md_type == :text)
lines << strip_indent(src.shift_line, indentation)
end
end
end
want_my_paragraph = saw_anything_after ||
(saw_empty && (src.cur_line && (src.cur_line.md_type == item_type)))
# dbg_describe_ary(lines, 'LI')
# create a new context
while lines.last && (lines.last.md_type == :empty)
lines.pop
end
return lines, want_my_paragraph
end
def read_quote(src)
parent_offset = src.cur_index
lines = []
# collect all indented lines
while src.cur_line && src.cur_line.md_type == :quote
lines << unquote(src.shift_line)
end
# dbg_describe_ary(lines, 'QUOTE')
src2 = LineSource.new(lines, src, parent_offset)
children = parse_blocks(src2)
return md_quote(children)
end
def read_code(src)
# collect all indented lines
lines = []
while src.cur_line && ([:code, :empty].include? src.cur_line.md_type)
lines << strip_indent(src.shift_line, 4)
end
#while lines.last && (lines.last.md_type == :empty )
while lines.last && lines.last.strip.size == 0
lines.pop
end
while lines.first && lines.first.strip.size == 0
lines.shift
end
return nil if lines.empty?
source = lines.join("\n")
# dbg_describe_ary(lines, 'CODE')
return md_codeblock(source)
end
# Reads a series of metadata lines with empty lines in between
def read_metadata(src)
hash = {}
while src.cur_line
case src.cur_line.md_type
when :empty; src.shift_line
when :metadata; hash.merge! parse_metadata(src.shift_line)
else break
end
end
hash
end
def read_ref_definition(src, out)
line = src.shift_line
# if link is incomplete, shift next line
if src.cur_line && (src.cur_line.md_type != :ref_definition) &&
([1,2,3].include? number_of_leading_spaces(src.cur_line) )
line += " "+ src.shift_line
end
# puts "total= #{line}"
match = LinkRegex.match(line)
if not match
maruku_error "Link does not respect format: '#{line}'"
return
end
id = match[1]; url = match[2]; title = match[3];
id = id.strip.downcase.gsub(' ','_')
hash = self.refs[id] = {:url=>url,:title=>title}
stuff=match[4]
if stuff
stuff.split.each do |couple|
# puts "found #{couple}"
k, v = couple.split('=')
v ||= ""
if v[0,1]=='"' then v = v[1, v.size-2] end
# puts "key:_#{k}_ value=_#{v}_"
hash[k.to_sym] = v
end
end
# puts hash.inspect
out.push md_ref_def(id, url, meta={:title=>title})
end
def read_table(src)
def split_cells(s)
s.strip.split('|').select{|x|x.strip.size>0}.map{|x|x.strip}
end
head = split_cells(src.shift_line).map{|s| md_el(:head_cell, parse_lines_as_span([s])) }
separator=split_cells(src.shift_line)
align = separator.map { |s| s =~ Sep
if $1 and $2 then :center elsif $2 then :right else :left end }
num_columns = align.size
if head.size != num_columns
maruku_error "Table head does not have #{num_columns} columns: \n#{head.inspect}"
tell_user "I will ignore this table."
# XXX try to recover
return md_br()
end
rows = []
while src.cur_line && src.cur_line =~ /\|/
row = split_cells(src.shift_line).map{|s|
md_el(:cell, parse_lines_as_span([s]))}
if head.size != num_columns
maruku_error "Row does not have #{num_columns} columns: \n#{row.inspect}"
tell_user "I will ignore this table."
# XXX try to recover
return md_br()
end
rows << row
end
children = (head+rows).flatten
return md_el(:table, children, {:align => align})
end
# If current line is text, a definition list is coming
# if 1) text,empty,[text,empty]*,definition
def eventually_comes_a_def_list(src)
future = src.tell_me_the_future
ok = future =~ %r{^t+e?d}x
# puts "future: #{future} - #{ok}"
ok
end
def read_definition(src)
# Read one or more terms
terms = []
while src.cur_line && src.cur_line.md_type == :text
terms << md_el(:definition_term, parse_lines_as_span([src.shift_line]))
end
# dbg_describe_ary(terms, 'DT')
want_my_paragraph = false
raise "Chunky Bacon!" if not src.cur_line
# one optional empty
if src.cur_line.md_type == :empty
want_my_paragraph = true
src.shift_line
end
raise "Chunky Bacon!" if src.cur_line.md_type != :definition
# Read one or more definitions
definitions = []
while src.cur_line && src.cur_line.md_type == :definition
parent_offset = src.cur_index
first = src.shift_line
first =~ Definition
first = $1
# I know, it's ugly!!!
lines, w_m_p =
read_indented_content(src,4, [:definition], :definition)
want_my_paragraph ||= w_m_p
lines.unshift first
# dbg_describe_ary(lines, 'DD')
src2 = LineSource.new(lines, src, parent_offset)
children = parse_blocks(src2)
definitions << md_el(:definition_data, children)
end
return md_el(:definition, terms+definitions, {
:terms => terms,
:definitions => definitions,
:want_my_paragraph => want_my_paragraph})
end
end # BlockLevelParser
end # MaRuKu
end
end

View file

@ -0,0 +1,226 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'iconv'
module MaRuKu; module In; module Markdown; module BlockLevelParser
def parse_doc(s)
meta2 = parse_email_headers(s)
data = meta2[:data]
meta2.delete :data
self.attributes.merge! meta2
=begin maruku_doc
Attribute: encoding
Scope: document
Summary: Encoding for the document.
If the `encoding` attribute is specified, then the content
will be converted from the specified encoding to UTF-8.
Conversion happens using the `iconv` library.
=end
enc = self.attributes[:encoding]
self.attributes.delete :encoding
if enc && enc.downcase != 'utf-8'
converted = Iconv.new('utf-8', enc).iconv(data)
# puts "Data: #{data.inspect}: #{data}"
# puts "Conv: #{converted.inspect}: #{converted}"
data = converted
end
@children = parse_text_as_markdown(data)
if true #markdown_extra?
self.search_abbreviations
self.substitute_markdown_inside_raw_html
end
toc = create_toc
# use title if not set
if not self.attributes[:title] and toc.header_element
title = toc.header_element.to_s
self.attributes[:title] = title
# puts "Set document title to #{title}"
end
# save for later use
self.toc = toc
# Now do the attributes magic
each_element do |e|
# default attribute list
if default = self.ald[e.node_type.to_s]
expand_attribute_list(default, e.attributes)
end
expand_attribute_list(e.al, e.attributes)
# puts "#{e.node_type}: #{e.attributes.inspect}"
end
=begin maruku_doc
Attribute: unsafe_features
Scope: global
Summary: Enables execution of XML instructions.
Disabled by default because of security concerns.
=end
if Maruku::Globals[:unsafe_features]
self.execute_code_blocks
# TODO: remove executed code blocks
end
end
# Expands an attribute list in an Hash
def expand_attribute_list(al, result)
al.each do |k, v|
case k
when :class
if not result[:class]
result[:class] = v
else
result[:class] += " " + v
end
when :id; result[:id] = v
when :ref;
if self.ald[v]
already = (result[:expanded_references] ||= [])
if not already.include?(v)
already.push v
expand_attribute_list(self.ald[v], result)
else
already.push v
maruku_error "Circular reference between labels.\n\n"+
"Label #{v.inspect} calls itself via recursion.\nThe recursion is "+
(already.map{|x| x.inspect}.join(' => '))
end
else
if not result[:unresolved_references]
result[:unresolved_references] = v
else
result[:unresolved_references] << " #{v}"
end
# $stderr.puts "Unresolved reference #{v.inspect} (avail: #{self.ald.keys.inspect})"
result[v.to_sym] = true
end
else
result[k.to_sym]=v
end
end
end
def safe_execute_code(object, code)
begin
return object.instance_eval(code)
rescue Exception => e
maruku_error "Exception while executing this:\n"+
add_tabs(code, 1, ">")+
"\nThe error was:\n"+
add_tabs(e.inspect+"\n"+e.caller.join("\n"), 1, "|")
rescue RuntimeError => e
maruku_error "2: Exception while executing this:\n"+
add_tabs(code, 1, ">")+
"\nThe error was:\n"+
add_tabs(e.inspect, 1, "|")
rescue SyntaxError => e
maruku_error "2: Exception while executing this:\n"+
add_tabs(code, 1, ">")+
"\nThe error was:\n"+
add_tabs(e.inspect, 1, "|")
end
nil
end
def execute_code_blocks
self.each_element(:xml_instr) do |e|
if e.target == 'maruku'
result = safe_execute_code(e, e.code)
if result.kind_of?(String)
puts "Result is : #{result.inspect}"
end
end
end
end
def search_abbreviations
self.abbreviations.each do |abbrev, title|
reg = Regexp.new(Regexp.escape(abbrev))
self.replace_each_string do |s|
if m = reg.match(s)
e = md_abbr(abbrev.dup, title ? title.dup : nil)
[m.pre_match, e, m.post_match]
else
s
end
end
end
end
include REXML
# (PHP Markdown extra) Search for elements that have
# markdown=1 or markdown=block defined
def substitute_markdown_inside_raw_html
self.each_element(:raw_html) do |e|
doc = e.instance_variable_get :@parsed_html
if doc # valid html
# parse block-level markdown elements in these HTML tags
block_tags = ['div']
# use xpath to find elements with 'markdown' attribute
XPath.match(doc, "//*[attribute::markdown]" ).each do |e|
# puts "Found #{e}"
# should we parse block-level or span-level?
parse_blocks = (e.attributes['markdown'] == 'block') ||
block_tags.include?(e.name)
# remove 'markdown' attribute
e.delete_attribute 'markdown'
# Select all text elements of e
XPath.match(e, "//text()" ).each { |original_text|
s = original_text.value.strip
if s.size > 0
el = md_el(:dummy,
parse_blocks ? parse_text_as_markdown(s) :
parse_lines_as_span([s]) )
p = original_text.parent
el.children_to_html.each do |x|
p.insert_before(original_text, x)
end
p.delete(original_text)
end
}
end
end
end
end
end end end end

View file

@ -0,0 +1,704 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'set'
module MaRuKu; module In; module Markdown; module SpanLevelParser
include MaRuKu::Helpers
EscapedCharInText =
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>]
EscapedCharInQuotes =
Set.new [?\\,?`,?*,?_,?{,?},?[,?],?(,?),?#,?.,?!,?|,?:,?+,?-,?>,?',?"]
EscapedCharInInlineCode = [?\\,?`]
def parse_lines_as_span(lines, parent=nil)
parse_span_better lines.join("\n"), parent
end
def parse_span_better(string, parent=nil)
if not string.kind_of? String then
error "Passed #{string.class}." end
st = (string + "")
st.freeze
src = CharSource.new(st, parent)
read_span(src, EscapedCharInText, [nil])
end
# This is the main loop for reading span elements
#
# It's long, but not *complex* or difficult to understand.
#
#
def read_span(src, escaped, exit_on_chars, exit_on_strings=nil)
con = SpanContext.new
c = d = nil
while true
c = src.cur_char
# This is only an optimization which cuts 50% of the time used.
# (but you can't use a-zA-z in exit_on_chars)
if c && ((c>=?a && c<=?z) || ((c>=?A && c<=?Z)))
con.cur_string << src.shift_char
next
end
break if exit_on_chars && exit_on_chars.include?(c)
break if exit_on_strings && exit_on_strings.any? {|x| src.cur_chars_are x}
# check if there are extensions
if check_span_extensions(src, con)
next
end
case c = src.cur_char
when ?\ # it's space (32)
if src.cur_chars_are " \n"
src.ignore_chars(3)
con.push_element md_br()
next
else
src.ignore_char
con.push_space
end
when ?\n, ?\t
src.ignore_char
con.push_space
when ?`
read_inline_code(src,con)
when ?<
# It could be:
# 1) HTML "<div ..."
# 2) HTML "<!-- ..."
# 3) url "<http:// ", "<ftp:// ..."
# 4) email "<andrea@... ", "<mailto:andrea@..."
# 5) on itself! "a < b "
# 6) Start of <<guillemettes>>
case d = src.next_char
when ?<; # guillemettes
src.ignore_chars(2)
con.push_char ?<
con.push_char ?<
when ?!;
if src.cur_chars_are '<!--'
read_inline_html(src, con)
else
con.push_char src.shift_char
end
when ??
read_xml_instr_span(src, con)
when ?\ , ?\t
con.push_char src.shift_char
else
if src.next_matches(/<mailto:/) or
src.next_matches(/<[\w\.]+\@/)
read_email_el(src, con)
elsif src.next_matches(/<\w+:/)
read_url_el(src, con)
elsif src.next_matches(/<\w/)
#puts "This is HTML: #{src.cur_chars(20)}"
read_inline_html(src, con)
else
#puts "This is NOT HTML: #{src.cur_chars(20)}"
con.push_char src.shift_char
end
end
when ?\\
d = src.next_char
if d == ?'
src.ignore_chars(2)
con.push_element md_entity('apos')
elsif d == ?"
src.ignore_chars(2)
con.push_element md_entity('quot')
elsif escaped.include? d
src.ignore_chars(2)
con.push_char d
else
con.push_char src.shift_char
end
when ?[
if markdown_extra? && src.next_char == ?^
read_footnote_ref(src,con)
else
read_link(src, con)
end
when ?!
if src.next_char == ?[
read_image(src, con)
else
con.push_char src.shift_char
end
when ?&
if m = src.read_regexp(/\&([\w\d]+);/)
con.push_element md_entity(m[1])
else
con.push_char src.shift_char
end
when ?*
if not src.next_char
maruku_error "Opening * as last char.", src, con
maruku_recover "Threating as literal"
con.push_char src.shift_char
else
follows = src.cur_chars(4)
if follows =~ /^\*\*\*[^\s\*]/
con.push_element read_emstrong(src,'***')
elsif follows =~ /^\*\*[^\s\*]/
con.push_element read_strong(src,'**')
elsif follows =~ /^\*[^\s\*]/
con.push_element read_em(src,'*')
else # * is just a normal char
con.push_char src.shift_char
end
end
when ?_
if not src.next_char
maruku_error "Opening _ as last char", src, con
maruku_recover "Threating as literal", src, con
con.push_char src.shift_char
else
follows = src.cur_chars(4)
if follows =~ /^\_\_\_[^\s\_]/
con.push_element read_emstrong(src,'___')
elsif follows =~ /^\_\_[^\s\_]/
con.push_element read_strong(src,'__')
elsif follows =~ /^\_[^\s\_]/
con.push_element read_em(src,'_')
else # _ is just a normal char
con.push_char src.shift_char
end
end
when ?{ # extension
src.ignore_char # {
interpret_extension(src, con, [?}])
src.ignore_char # }
when nil
maruku_error ("Unclosed span (waiting for %s"+
"#{exit_on_strings.inspect})") % [
exit_on_chars ? "#{exit_on_chars.inspect} or" : ""],
src,con
break
else # normal text
con.push_char src.shift_char
end # end case
end # end while true
con.push_string_if_present
# Assign IAL to elements
merge_ial(con.elements, src, con)
# Remove leading space
if (s = con.elements.first).kind_of? String
if s[0] == ?\ then con.elements[0] = s[1, s.size-1] end
con.elements.shift if s.size == 0
end
# Remove final spaces
if (s = con.elements.last).kind_of? String
s.chop! if s[-1] == ?\
con.elements.pop if s.size == 0
end
educated = educate(con.elements)
educated
end
def read_xml_instr_span(src, con)
src.ignore_chars(2) # starting <?
# read target <?target code... ?>
target = if m = src.read_regexp(/(\w+)/)
m[1]
else
''
end
delim = "?>"
code =
read_simple(src, escaped=[], break_on_chars=[],
break_on_strings=[delim])
src.ignore_chars delim.size
code = (code || "").strip
con.push_element md_xml_instr(target, code)
end
# Start: cursor on character **after** '{'
# End: curson on '}' or EOF
def interpret_extension(src, con, break_on_chars)
case src.cur_char
when ?:
src.ignore_char # :
extension_meta(src, con, break_on_chars)
when ?#, ?.
extension_meta(src, con, break_on_chars)
else
stuff = read_simple(src, escaped=[?}], break_on_chars, [])
if stuff =~ /^(\w+\s|[^\w])/
extension_id = $1.strip
if false
else
maruku_recover "I don't know what to do with extension '#{extension_id}'\n"+
"I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
extension_meta(src, con, break_on_chars)
end
else
maruku_recover "I will threat this:\n\t{#{stuff}} \n as meta-data.\n", src, con
extension_meta(src, con, break_on_chars)
end
end
end
def extension_meta(src, con, break_on_chars)
if m = src.read_regexp(/([^\s\:]+):/)
name = m[1]
al = read_attribute_list(src, con, break_on_chars)
# puts "#{name}=#{al.inspect}"
self.doc.ald[name] = al
con.push md_ald(name, al)
else
al = read_attribute_list(src, con, break_on_chars)
self.doc.ald[name] = al
con.push md_ial(al)
end
end
def read_url_el(src,con)
src.ignore_char # leading <
url = read_simple(src, [], [?>])
src.ignore_char # closing >
con.push_element md_url(url)
end
def read_email_el(src,con)
src.ignore_char # leading <
mail = read_simple(src, [], [?>])
src.ignore_char # closing >
address = mail.gsub(/^mailto:/,'')
con.push_element md_email(address)
end
def read_url(src, break_on)
if [?',?"].include? src.cur_char
error 'Invalid char for url', src
end
url = read_simple(src, [], break_on)
if not url # empty url
url = ""
end
if url[0] == ?< && url[-1] == ?>
url = url[1, url.size-2]
end
if url.size == 0
return nil
end
url
end
def read_quoted_or_unquoted(src, con, escaped, exit_on_chars)
case src.cur_char
when ?', ?"
read_quoted(src, con)
else
read_simple(src, escaped, exit_on_chars)
end
end
# Tries to read a quoted value. If stream does not
# start with ' or ", returns nil.
def read_quoted(src, con)
case src.cur_char
when ?', ?"
quote_char = src.shift_char # opening quote
string = read_simple(src, EscapedCharInQuotes, [quote_char])
src.ignore_char # closing quote
return string
else
# puts "Asked to read quote from: #{src.cur_chars(10).inspect}"
return nil
end
end
# Reads a simple string (no formatting) until one of break_on_chars,
# while escaping the escaped.
# If the string is empty, it returns nil.
# Raises on error if the string terminates unexpectedly.
# # If eat_delim is true, and if the delim is not the EOF, then the delim
# # gets eaten from the stream.
def read_simple(src, escaped, exit_on_chars, exit_on_strings=nil)
text = ""
while true
# puts "Reading simple #{text.inspect}"
c = src.cur_char
if exit_on_chars && exit_on_chars.include?(c)
# src.ignore_char if eat_delim
break
end
break if exit_on_strings &&
exit_on_strings.any? {|x| src.cur_chars_are x}
case c
when nil
s= "String finished while reading (break on "+
"#{exit_on_chars.map{|x|""<<x}.inspect})"+
" already read: #{text.inspect}"
maruku_error s, src
maruku_recover "I boldly continue", src
break
when ?\\
d = src.next_char
if escaped.include? d
src.ignore_chars(2)
text << d
else
text << src.shift_char
end
else
text << src.shift_char
end
end
# puts "Read simple #{text.inspect}"
text.empty? ? nil : text
end
def read_em(src, delim)
src.ignore_char
children = read_span(src, EscapedCharInText, nil, [delim])
src.ignore_char
md_em(children)
end
def read_strong(src, delim)
src.ignore_chars(2)
children = read_span(src, EscapedCharInText, nil, [delim])
src.ignore_chars(2)
md_strong(children)
end
def read_emstrong(src, delim)
src.ignore_chars(3)
children = read_span(src, EscapedCharInText, nil, [delim])
src.ignore_chars(3)
md_emstrong(children)
end
SPACE = ?\ # = 32
# R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
R_REF_ID = Regexp.compile(/([^\]\s]*)(\s*\])/)
# Reads a bracketed id "[refid]". Consumes also both brackets.
def read_ref_id(src, con)
src.ignore_char # [
src.consume_whitespace
# puts "Next: #{src.cur_chars(10).inspect}"
if m = src.read_regexp(R_REF_ID)
# puts "Got: #{m[1].inspect} Ignored: #{m[2].inspect}"
# puts "Then: #{src.cur_chars(10).inspect}"
m[1]
else
nil
end
end
def read_footnote_ref(src,con)
ref = read_ref_id(src,con)
con.push_element md_foot_ref(ref)
end
def read_inline_html(src, con)
h = HTMLHelper.new
begin
# This is our current buffer in the context
start = src.current_remaining_buffer
h.eat_this start
if not h.is_finished?
error "inline_html: Malformed:\n "+
"#{start.inspect}\n #{h.inspect}",src,con
end
consumed = start.size - h.rest.size
if consumed > 0
con.push_element md_html(h.stuff_you_read)
src.ignore_chars(consumed)
else
puts "HTML helper did not work on #{start.inspect}"
con.push_char src.shift_char
end
rescue Exception => e
maruku_error "Bad html: \n" +
add_tabs(e.inspect+e.backtrace.join("\n"),1,'>'),
src,con
maruku_recover "I will try to continue after bad HTML.", src, con
con.push_char src.shift_char
end
end
def read_inline_code(src, con)
# Count the number of ticks
num_ticks = 0
while src.cur_char == ?`
num_ticks += 1
src.ignore_char
end
# We will read until this string
end_string = "`"*num_ticks
code =
read_simple(src, escaped=[], break_on_chars=[],
break_on_strings=[end_string])
# puts "Now I expects #{num_ticks} ticks: #{src.cur_chars(10).inspect}"
src.ignore_chars num_ticks
# Ignore at most one space
if num_ticks > 1 && code[0] == SPACE
code = code[1, code.size-1]
end
# drop last space
if num_ticks > 1 && code[-1] == SPACE
code = code[0,code.size-1]
end
# puts "Read `` code: #{code.inspect}; after: #{src.cur_chars(10).inspect} "
con.push_element md_code(code)
end
def read_link(src, con)
# we read the string and see what happens
src.ignore_char # opening bracket
children = read_span(src, EscapedCharInText, [?]])
src.ignore_char # closing bracket
# ignore space
if src.cur_char == SPACE and
(src.next_char == ?[ or src.next_char == ?( )
src.shift_char
end
case src.cur_char
when ?(
src.ignore_char # opening (
src.consume_whitespace
url = read_url(src, [SPACE,?\t,?)])
if not url
url = '' # no url is ok
end
src.consume_whitespace
title = nil
if src.cur_char != ?) # we have a title
quote_char = src.cur_char
title = read_quoted(src,con)
if not title
maruku_error 'Must quote title',src,con
else
# Tries to read a title with quotes: ![a](url "ti"tle")
# this is the most ugly thing in Markdown
if not src.next_matches(/\s*\)/)
# if there is not a closing par ), then read
# the rest and guess it's title with quotes
rest = read_simple(src, escaped=[], break_on_chars=[?)],
break_on_strings=[])
# chop the closing char
rest.chop!
title << quote_char << rest
end
end
end
src.consume_whitespace
closing = src.shift_char # closing )
if closing != ?)
maruku_error 'Unclosed link',src,con
maruku_recover "No closing ): I will not create"+
" the link for #{children.inspect}", src, con
con.push_elements children
return
end
con.push_element md_im_link(children,url, title)
when ?[ # link ref
ref_id = read_ref_id(src,con)
if ref_id
if ref_id.size == 0
ref_id = children.to_s.downcase.gsub(' ','_')
else
ref_id = ref_id.downcase
end
con.push_element md_link(children, ref_id)
else
maruku_error "Could not read ref_id", src, con
maruku_recover "I will not create the link for "+
"#{children.inspect}", src, con
con.push_elements children
return
end
else # empty [link]
id = children.to_s.downcase.gsub(' ','_')
con.push_element md_link(children, id)
end
end # read link
def read_image(src, con)
src.ignore_chars(2) # opening "!["
alt_text = read_span(src, EscapedCharInText, [?]])
src.ignore_char # closing bracket
# ignore space
if src.cur_char == SPACE and
(src.next_char == ?[ or src.next_char == ?( )
src.ignore_char
end
case src.cur_char
when ?(
src.ignore_char # opening (
src.consume_whitespace
url = read_url(src, [SPACE,?\t,?)])
if not url
error "Could not read url from #{src.cur_chars(10).inspect}",
src,con
end
src.consume_whitespace
title = nil
if src.cur_char != ?) # we have a title
quote_char = src.cur_char
title = read_quoted(src,con)
if not title
maruku_error 'Must quote title',src,con
else
# Tries to read a title with quotes: ![a](url "ti"tle")
# this is the most ugly thing in Markdown
if not src.next_matches(/\s*\)/)
# if there is not a closing par ), then read
# the rest and guess it's title with quotes
rest = read_simple(src, escaped=[], break_on_chars=[?)],
break_on_strings=[])
# chop the closing char
rest.chop!
title << quote_char << rest
end
end
end
src.consume_whitespace
closing = src.shift_char # closing )
if closing != ?)
error ("Unclosed link: '"<<closing<<"'")+
" Read url=#{url.inspect} title=#{title.inspect}",src,con
end
con.push_element md_im_image(alt_text, url, title)
when ?[ # link ref
ref_id = read_ref_id(src,con)
if ref_id.size == 0
ref_id = alt_text.to_s.downcase.gsub(' ','_')
else
ref_id = ref_id.downcase
end
con.push_element md_image(alt_text, ref_id)
else # no stuff
ref_id = alt_text.to_s.downcase.gsub(' ','_')
con.push_element md_image(alt_text, ref_id)
end
end # read link
class SpanContext
include MaRuKu::Strings
# Read elements
attr_accessor :elements
attr_accessor :cur_string
def initialize
@elements = []
@cur_string = ""
end
def push_element(e)
raise "Only MDElement and String, please. You pushed #{e.class}: #{e.inspect} " if
not (e.kind_of?(String) or e.kind_of?(MDElement))
push_string_if_present
@elements << e
nil
end
alias push push_element
def push_elements(a)
for e in a
if e.kind_of? String
e.each_byte do |b| push_char b end
else
push_element e
end
end
end
def push_string_if_present
if @cur_string.size > 0
@elements << @cur_string
@cur_string = ""
end
nil
end
def push_char(c)
@cur_string << c
nil
end
# push space into current string if
# there isn't one
def push_space
last = @cur_string[@cur_string.size-1]
@cur_string << ?\ if last != ?\
end
def describe
lines = @elements.map{|x| x.inspect}.join("\n")
s = "Elements read in span: \n" +
add_tabs(lines,1, ' -')+"\n"
if @cur_string.size > 0
s += "Current string: \n #{@cur_string.inspect}\n"
end
s
end
end # SpanContext
end end end end # module MaRuKu; module In; module Markdown; module SpanLevelParser

View file

@ -0,0 +1,225 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
#
# NOTA BENE:
#
# The following algorithm is a rip-off of RubyPants written by
# Christian Neukirchen.
#
# RubyPants is a Ruby port of SmartyPants written by John Gruber.
#
# This file is distributed under the GPL, which I guess is compatible
# with the terms of the RubyPants license.
#
# -- Andrea Censi
# = RubyPants -- SmartyPants ported to Ruby
#
# Ported by Christian Neukirchen <mailto:chneukirchen@gmail.com>
# Copyright (C) 2004 Christian Neukirchen
#
# Incooporates ideas, comments and documentation by Chad Miller
# Copyright (C) 2004 Chad Miller
#
# Original SmartyPants by John Gruber
# Copyright (C) 2003 John Gruber
#
#
# = RubyPants -- SmartyPants ported to Ruby
#
#
# [snip]
#
# == Authors
#
# John Gruber did all of the hard work of writing this software in
# Perl for Movable Type and almost all of this useful documentation.
# Chad Miller ported it to Python to use with Pyblosxom.
#
# Christian Neukirchen provided the Ruby port, as a general-purpose
# library that follows the *Cloth API.
#
#
# == Copyright and License
#
# === SmartyPants license:
#
# Copyright (c) 2003 John Gruber
# (http://daringfireball.net)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# * Neither the name "SmartyPants" nor the names of its contributors
# may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# This software is provided by the copyright holders and contributors
# "as is" and any express or implied warranties, including, but not
# limited to, the implied warranties of merchantability and fitness
# for a particular purpose are disclaimed. In no event shall the
# copyright owner or contributors be liable for any direct, indirect,
# incidental, special, exemplary, or consequential damages (including,
# but not limited to, procurement of substitute goods or services;
# loss of use, data, or profits; or business interruption) however
# caused and on any theory of liability, whether in contract, strict
# liability, or tort (including negligence or otherwise) arising in
# any way out of the use of this software, even if advised of the
# possibility of such damage.
#
# === RubyPants license
#
# RubyPants is a derivative work of SmartyPants and smartypants.py.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# This software is provided by the copyright holders and contributors
# "as is" and any express or implied warranties, including, but not
# limited to, the implied warranties of merchantability and fitness
# for a particular purpose are disclaimed. In no event shall the
# copyright owner or contributors be liable for any direct, indirect,
# incidental, special, exemplary, or consequential damages (including,
# but not limited to, procurement of substitute goods or services;
# loss of use, data, or profits; or business interruption) however
# caused and on any theory of liability, whether in contract, strict
# liability, or tort (including negligence or otherwise) arising in
# any way out of the use of this software, even if advised of the
# possibility of such damage.
#
#
# == Links
#
# John Gruber:: http://daringfireball.net
# SmartyPants:: http://daringfireball.net/projects/smartypants
#
# Chad Miller:: http://web.chad.org
#
# Christian Neukirchen:: http://kronavita.de/chris
module MaRuKu; module In; module Markdown; module SpanLevelParser
Punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
Close_class = %![^\ \t\r\n\\[\{\(\-]!
Rules = [
[/---/, :mdash ],
[/--/, :ndash ],
['...', :hellip ],
['. . .', :hellip ],
["``", :ldquo ],
["''", :rdquo ],
[/<<\s/, [:laquo, :nbsp] ],
[/\s>>/, [:nbsp, :raquo] ],
[/<</, :laquo ],
[/>>/, :raquo ],
# def educate_single_backticks(str)
# ["`", :lsquo]
# ["'", :rsquo]
# Special case if the very first character is a quote followed by
# punctuation at a non-word-break. Close the quotes by brute
# force:
[/^'(?=#{Punct_class}\B)/, :rsquo],
[/^"(?=#{Punct_class}\B)/, :rdquo],
# Special case for double sets of quotes, e.g.:
# <p>He said, "'Quoted' words in a larger quote."</p>
[/"'(?=\w)/, [:ldquo, :lsquo] ],
[/'"(?=\w)/, [:lsquo, :ldquo] ],
# Special case for decade abbreviations (the '80s):
[/'(?=\d\ds)/, :rsquo ],
# Get most opening single quotes:
[/(\s)'(?=\w)/, [:one, :lsquo] ],
# Single closing quotes:
[/(#{Close_class})'/, [:one, :rsquo]],
[/'(\s|s\b|$)/, [:rsquo, :one]],
# Any remaining single quotes should be opening ones:
[/'/, :lsquo],
# Get most opening double quotes:
[/(\s)"(?=\w)/, [:one, :ldquo]],
# Double closing quotes:
[/(#{Close_class})"/, [:one, :rdquo]],
[/"(\s|s\b|$)/, [:rdquo, :one]],
# Any remaining quotes should be opening ones:
[/"/, :ldquo]
].
map{|reg, subst| # People should do the thinking, machines should do the work.
reg = Regexp.new(Regexp.escape(reg)) if not reg.kind_of? Regexp
subst = [subst] if not subst.kind_of?Array
[reg, subst]}
# note: input will be destroyed
def apply_one_rule(reg, subst, input)
output = []
while first = input.shift
if first.kind_of?(String) && (m = reg.match(first))
output.push m. pre_match if m. pre_match.size > 0
input.unshift m.post_match if m.post_match.size > 0
subst.reverse.each do |x|
input.unshift( x == :one ? m[1] : md_entity(x.to_s) ) end
else
output.push first
end
end
return output
end
def educate(elements)
Rules.each do |reg, subst|
elements = apply_one_rule(reg, subst, elements)
end
# strips empty strings
elements.delete_if {|x| x.kind_of?(String) && x.size == 0}
final = []
# join consecutive strings
elements.each do |x|
if x.kind_of?(String) && final.last.kind_of?(String)
final.last << x
else
final << x
end
end
return final
end
end end end end

View file

@ -0,0 +1,141 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class String
include MaRuKu::Strings
def md_type()
@md_type ||= line_md_type(self)
end
end
class NilClass
def md_type() nil end
end
# This code does the classification of lines for block-level parsing.
module MaRuKu; module Strings
def line_md_type(l)
# The order of evaluation is important (:text is a catch-all)
return :text if l =~ /^[a-zA-Z]/
return :code if number_of_leading_spaces(l)>=4
return :empty if l =~ /^\s*$/
return :footnote_text if l =~ FootnoteText
return :ref_definition if l =~ LinkRegex or l=~ IncompleteLink
return :abbreviation if l =~ Abbreviation
return :definition if l =~ Definition
# I had a bug with emails and urls at the beginning of the
# line that were mistaken for raw_html
return :text if l=~ /^#{EMailAddress}/
return :text if l=~ /^<http:/
# raw html is like PHP Markdown Extra: at most three spaces before
return :xml_instr if l =~ %r{^\s*<\?}
return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?</?\s*\w+}
return :raw_html if l =~ %r{^[ ]?[ ]?[ ]?<\!\-\-}
return :ulist if l =~ /^\s?([\*\-\+])\s+.*\w+/
return :olist if l =~ /^\s?\d+\..*\w+/
return :header1 if l =~ /^(=)+/
return :header2 if l =~ /^([-\s])+$/
return :header3 if l =~ /^(#)+\s*\S+/
# at least three asterisks on a line, and only whitespace
return :hrule if l =~ /^(\s*\*\s*){3,1000}$/
return :hrule if l =~ /^(\s*-\s*){3,1000}$/ # or hyphens
return :hrule if l =~ /^(\s*_\s*){3,1000}$/ # or underscores
return :quote if l =~ /^>/
return :metadata if l =~ /^@/
# if @@new_meta_data?
return :ald if l =~ AttributeDefinitionList
return :ial if l =~ InlineAttributeList
# end
# return :equation_end if l =~ EquationEnd
return :text # else, it's just text
end
# $1 = id $2 = attribute list
AttributeDefinitionList = /^\s{0,3}\{([\w\d\s]+)\}:\s*(.*)\s*$/
#
InlineAttributeList = /^\s{0,3}\{(.*)\}\s*$/
# Example:
# ^:blah blah
# ^: blah blah
# ^ : blah blah
Definition = %r{
^ # begin of line
[ ]{0,3} # up to 3 spaces
: # colon
\s* # whitespace
(\S.*) # the text = $1
$ # end of line
}x
# Example:
# *[HTML]: Hyper Text Markup Language
Abbreviation = %r{
^ # begin of line
\* # one asterisk
\[ # opening bracket
([^\]]+) # any non-closing bracket: id = $1
\] # closing bracket
: # colon
\s* # whitespace
(\S.*\S)* # definition=$2
\s* # strip this whitespace
$ # end of line
}x
FootnoteText = %r{
^\s*\[(\^.+)\]: # id = $1 (including '^')
\s*(\S.*)?$ # text = $2 (not obb.)
}x
# This regex is taken from BlueCloth sources
# Link defs are in the form: ^[id]: \n? url "optional title"
LinkRegex = %r{
^[ ]{0,3}\[([^\[\]]+)\]: # id = $1
[ ]*
<?(\S+)>? # url = $2
[ ]*
(?:# Titles are delimited by "quotes" or (parens).
["(']
(.+?) # title = $3
[")'] # Matching ) or "
\s*(.+)? # stuff = $4
)? # title is optional
}x
IncompleteLink = %r{^[ ]{0,3}\[([^\[\]]+)\]:\s*$}
HeaderWithId = /^(.*)\{\#([\w_-]+)\}\s*$/
HeaderWithAttributes = /^(.*)\{(.*)\}\s*$/
# if contains a pipe, it could be a table header
MightBeTableHeader = %r{\|}
# -------------:
Sep = /\s*(\:)?\s*-+\s*(\:)?\s*/
# | -------------:| ------------------------------ |
TableSeparator = %r{^(\|?#{Sep}\|?)+\s*$}
EMailAddress = /<([^:]+@[^:]+)>/
end end

View file

@ -0,0 +1,33 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
# The Maruku class is the public interface
class Maruku
def initialize(s=nil, meta={})
super(nil)
self.attributes.merge! meta
if s
parse_doc(s)
end
end
end

View file

@ -0,0 +1,862 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'rexml/document'
class String
# A string is rendered into HTML by creating
# a REXML::Text node. REXML takes care of all the encoding.
def to_html
REXML::Text.new(self)
end
end
class REXML::Element
# We only want to output the children in Maruku::to_html
public :write_children
end
# This module groups all functions related to HTML export.
module MaRuKu; module Out; module HTML
include REXML
# Render as an HTML fragment (no head, just the content of BODY). (returns a string)
def to_html(context={})
indent = context[:indent] || -1
ie_hack = context[:ie_hack] ||true
div = Element.new 'dummy'
children_to_html.each do |e|
div << e
end
# render footnotes
if @doc.footnotes_order.size > 0
div << render_footnotes
end
doc = Document.new(nil,{:respect_whitespace =>:all})
doc << div
# REXML Bug? if indent!=-1 whitespace is not respected for 'pre' elements
# containing code.
xml =""
div.write_children(xml,indent,transitive=true,ie_hack)
xml
end
# Render to a complete HTML document (returns a string)
def to_html_document(context={})
indent = context[:indent] || -1
ie_hack = context[:ie_hack] ||true
doc = to_html_document_tree
xml = ""
# REXML Bug? if indent!=-1 whitespace is not respected for 'pre' elements
# containing code.
doc.write(xml,indent,transitive=true,ie_hack);
xhtml10strict = "
<?xml version='1.0' encoding='utf-8'?>
<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN'
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>\n"
xhtml11strict_mathml2 = '<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"
"http://www.w3.org/TR/MathML2/dtd/xhtml-math11-f.dtd" [
<!ENTITY mathml "http://www.w3.org/1998/Math/MathML">
]>
'
xhtml11_mathml2_svg11 =
'<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC
"-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
"http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
'
xhtml11_mathml2_svg11 + xml
end
def xml_newline() Text.new("\n") end
=begin maruku_doc
Attribute: title
Scope: document
Sets the title of the document.
If a title is not specified, the first header will be used.
These should be equivalent:
Title: my document
Content
and
my document
===========
Content
In both cases, the title is set to "my document".
=end
=begin maruku_doc
Attribute: subject
Scope: document
Synonim for `title`.
=end
=begin maruku_doc
Attribute: css
Scope: document
Output: HTML
Summary: Activates CSS stylesheets for HTML.
`css` should be a space-separated list of urls.
Example:
CSS: style.css math.css
=end
# Render to a complete HTML document (returns a REXML document tree)
def to_html_document_tree
doc = Document.new(nil,{:respect_whitespace =>:all})
# doc << XMLDecl.new
root = Element.new('html', doc)
root.add_namespace('http://www.w3.org/1999/xhtml')
root.add_namespace('svg', "http://www.w3.org/2000/svg" )
lang = self.attributes[:lang] || 'en'
root.attributes['xml:lang'] = lang
root << xml_newline
head = Element.new 'head', root
#<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8">
me = Element.new 'meta', head
me.attributes['http-equiv'] = 'Content-type'
# me.attributes['content'] = 'text/html;charset=utf-8'
me.attributes['content'] = 'application/xhtml+xml;charset=utf-8'
# Create title element
doc_title = self.attributes[:title] || self.attributes[:subject] || ""
title = Element.new 'title', head
title << Text.new(doc_title)
if css_list = self.attributes[:css]
css_list.split.each do |css|
# <link type="text/css" rel="stylesheet" href="..." />
link = Element.new 'link'
link.attributes['type'] = 'text/css'
link.attributes['rel'] = 'stylesheet'
link.attributes['href'] = css
head << link
head << xml_newline
end
end
root << xml_newline
body = Element.new 'body'
children_to_html.each do |e|
body << e
end
# render footnotes
if @doc.footnotes_order.size > 0
body << render_footnotes
end
# When we are rendering a whole document, we add a signature
# at the bottom.
if get_setting(:maruku_signature)
body << maruku_html_signature
end
root << body
doc
end
# returns "st","nd","rd" or "th" as appropriate
def day_suffix(day)
s = {
1 => 'st',
2 => 'nd',
3 => 'rd',
21 => 'st',
22 => 'nd',
23 => 'rd',
31 => 'st'
}
return s[day] || 'th';
end
# formats a nice date
def nice_date
t = Time.now
t.strftime(" at %H:%M on ")+
t.strftime("%A, %B %d")+
day_suffix(t.day)+
t.strftime(", %Y")
end
def maruku_html_signature
div = Element.new 'div'
div.attributes['class'] = 'maruku_signature'
Element.new 'hr', div
span = Element.new 'span', div
span.attributes['style'] = 'font-size: small; font-style: italic'
span << Text.new('Created by ')
a = Element.new('a', span)
a.attributes['href'] = 'http://maruku.rubyforge.org'
a.attributes['title'] = 'Maruku: a Markdown-superset interpreter for Ruby'
a << Text.new('Maruku')
span << Text.new(nice_date+".")
div
end
def render_footnotes
div = Element.new 'div'
div.attributes['class'] = 'footnotes'
div << Element.new('hr')
ol = Element.new 'ol'
@doc.footnotes_order.each_with_index do |fid, i| num = i+1
f = self.footnotes[fid]
if f
li = f.wrap_as_element('li')
li.attributes['id'] = "fn:#{num}"
a = Element.new 'a'
a.attributes['href'] = "#fnref:#{num}"
a.attributes['rev'] = 'footnote'
a<< Text.new('&#8617;', true, nil, true)
li.insert_after(li.children.last, a)
ol << li
else
maruku_error"Could not find footnote '#{fid}'"
end
end
div << ol
div
end
def to_html_hrule; create_html_element 'hr' end
def to_html_linebreak; Element.new 'br' end
# renders children as html and wraps into an element of given name
#
# Sets 'id' if meta is set
def wrap_as_element(name, attributes_to_copy=[])
m = create_html_element(name, attributes_to_copy)
children_to_html.each do |e| m << e; end
# m << Comment.new( "{"+self.al.to_md+"}") if not self.al.empty?
# m << Comment.new( @attributes.inspect) if not @attributes.empty?
m
end
=begin maruku_doc
Attribute: id
Scope: element
Output: LaTeX, HTML
It is copied as a standard HTML attribute.
Moreover, it used as a label name for hyperlinks in both HTML and
in PDF.
=end
=begin maruku_doc
Attribute: class
Scope: element
Output: HTML
It is copied as a standard HTML attribute.
=end
=begin maruku_doc
Attribute: style
Scope: element
Output: HTML
It is copied as a standard HTML attribute.
=end
StandardAttributes = [:id, :style, :class]
def create_html_element(name, attributes_to_copy=[])
m = Element.new name
(StandardAttributes+attributes_to_copy).each do |a|
if v = @attributes[a] then m.attributes[a.to_s] = v.to_s end
end
m
end
def to_html_ul
if @attributes[:toc]
# render toc
html_toc = @doc.toc.to_html
return html_toc
else
add_ws wrap_as_element('ul')
end
end
def to_html_paragraph; add_ws wrap_as_element('p') end
def to_html_ol; add_ws wrap_as_element('ol') end
def to_html_li; add_ws wrap_as_element('li') end
def to_html_li_span; add_ws wrap_as_element('li') end
def to_html_quote; add_ws wrap_as_element('blockquote') end
def to_html_strong; wrap_as_element('strong') end
def to_html_emphasis; wrap_as_element('em') end
=begin maruku_doc
Attribute: use_numbered_headers
Scope: document
Summary: Activates the numbering of headers.
If `true`, section headers will be numbered.
In LaTeX export, the numbering of headers is managed
by Maruku, to have the same results in both HTML and LaTeX.
=end
# nil if not applicable, else string
def section_number
return nil if not get_setting(:use_numbered_headers)
n = @attributes[:section_number]
if n && (not n.empty?)
n.join('.')+". "
else
nil
end
end
# nil if not applicable, else SPAN element
def render_section_number
# if we are bound to a section, add section number
if num = section_number
span = Element.new 'span'
span.attributes['class'] = 'maruku_section_number'
span << Text.new(section_number)
span
else
nil
end
end
def to_html_header
element_name = "h#{self.level}"
h = wrap_as_element element_name
if span = render_section_number
h.insert_before(h.children.first, span)
end
add_ws h
end
def source2html(source)
source = source.gsub(/&/,'&amp;')
source = Text.normalize(source)
source = source.gsub(/\&apos;/,'&#39;') # IE bug
source = source.gsub(/'/,'&#39;') # IE bug
Text.new(source, true, nil, true )
end
=begin maruku_doc
Attribute: html_use_syntax
Scope: global, document, element
Output: HTML
Summary: Enables the use of the `syntax` package.
Related: lang, code_lang
Default: <?mrk md_code(Globals[:html_use_syntax].to_s) ?>
If true, the `syntax` package is used. It supports the `ruby` and `xml`
languages. Remember to set the `lang` attribute of the code block.
Examples:
require 'maruku'
{:lang=ruby html_use_syntax=true}
and
<div style="text-align:center">Div</div>
{:lang=html html_use_syntax=true}
produces:
require 'maruku'
{:lang=ruby html_use_syntax=true}
and
<div style="text-align:center">Div</div>
{:lang=html html_use_syntax=true}
=end
def to_html_code;
source = self.raw_code
lang = self.attributes[:lang] || @doc.attributes[:code_lang]
lang = 'xml' if lang=='html'
use_syntax = get_setting :html_use_syntax
element =
if use_syntax && lang
begin
if not $syntax_loaded
require 'rubygems'
require 'syntax'
require 'syntax/convertors/html'
$syntax_loaded = true
end
convertor = Syntax::Convertors::HTML.for_syntax lang
# eliminate trailing newlines otherwise Syntax crashes
source = source.gsub(/\n*\Z/,'')
html = convertor.convert( source )
html = html.gsub(/\&apos;/,'&#39;') # IE bug
html = html.gsub(/'/,'&#39;') # IE bug
# html = html.gsub(/&/,'&amp;')
code = Document.new(html, {:respect_whitespace =>:all}).root
code.name = 'code'
code.attributes['class'] = lang
code.attributes['lang'] = lang
pre = Element.new 'pre'
pre << code
pre
rescue LoadError => e
maruku_error "Could not load package 'syntax'.\n"+
"Please install it, for example using 'gem install syntax'."
to_html_code_using_pre(source)
rescue Object => e
maruku_error"Error while using the syntax library for code:\n#{source.inspect}"+
"Lang is #{lang} object is: \n"+
self.inspect +
"\nException: #{e.class}: #{e.message}\n\t#{e.backtrace.join("\n\t")}"
tell_user("Using normal PRE because the syntax library did not work.")
to_html_code_using_pre(source)
end
else
to_html_code_using_pre(source)
end
color = get_setting(:code_background_color)
if color != Globals[:code_background_color]
element.attributes['style'] = "background-color: #{color};"
end
add_ws element
end
=begin maruku_doc
Attribute: code_background_color
Scope: global, document, element
Summary: Background color for code blocks.
The format is either a named color (`green`, `red`) or a CSS color
of the form `#ff00ff`.
* for **HTML output**, the value is put straight in the `background-color` CSS
property of the block.
* for **LaTeX output**, if it is a named color, it must be a color accepted
by the LaTeX `color` packages. If it is of the form `#ff00ff`, Maruku
defines a color using the `\color[rgb]{r,g,b}` macro.
For example, for `#0000ff`, the macro is called as: `\color[rgb]{0,0,1}`.
=end
def to_html_code_using_pre(source)
pre = create_html_element 'pre'
code = Element.new 'code', pre
s = source
s = s.gsub(/&/,'&amp;')
s = Text.normalize(s)
s = s.gsub(/\&apos;/,'&#39;') # IE bug
s = s.gsub(/'/,'&#39;') # IE bug
if get_setting(:code_show_spaces)
# 187 = raquo
# 160 = nbsp
# 172 = not
s.gsub!(/\t/,'&#187;'+'&#160;'*3)
s.gsub!(/ /,'&#172;')
end
text = Text.new(s, respect_ws=true, parent=nil, raw=true )
if lang = self.attributes[:lang]
code.attributes['lang'] = lang
code.attributes['class'] = lang
end
code << text
pre
end
def to_html_inline_code;
pre = create_html_element 'code'
source = self.raw_code
pre << source2html(source)
color = get_setting(:code_background_color)
if color != Globals[:code_background_color]
pre.attributes['style'] = "background-color: #{color};"
end
pre
end
def add_class_to(el, cl)
el.attributes['class'] =
if already = el.attributes['class']
already + " " + cl
else
cl
end
end
def add_class_to_link(a)
return # not ready yet
url = a.attributes['href']
return if not url
if url =~ /^#/
add_class_to(a, 'maruku-link-samedoc')
elsif url =~ /^http:/
add_class_to(a, 'maruku-link-external')
else
add_class_to(a, 'maruku-link-local')
end
# puts a.attributes['class']
end
def to_html_immediate_link
a = create_html_element 'a'
url = self.url
text = url.gsub(/^mailto:/,'') # don't show mailto
a << Text.new(text)
a.attributes['href'] = url
add_class_to_link(a)
a
end
def to_html_link
a = wrap_as_element 'a'
id = self.ref_id
if ref = @doc.refs[id]
url = ref[:url]
title = ref[:title]
a.attributes['href'] = url if url
a.attributes['title'] = title if title
else
maruku_error "Could not find ref_id = #{id.inspect} for #{self.inspect}\n"+
"Available refs are #{@doc.refs.keys.inspect}"
tell_user "Not creating a link for ref_id = #{id.inspect}."
return wrap_as_element('span')
end
# add_class_to_link(a)
return a
end
def to_html_im_link
if url = self.url
title = self.title
a = wrap_as_element 'a'
a.attributes['href'] = url
a.attributes['title'] = title if title
return a
else
maruku_error"Could not find url in #{self.inspect}"
tell_user "Not creating a link for ref_id = #{id.inspect}."
return wrap_as_element('span')
end
end
def add_ws(e)
[Text.new("\n"), e, Text.new("\n")]
end
##### Email address
def obfuscate(s)
res = ''
s.each_byte do |char|
res += "&#%03d;" % char
end
res
end
def to_html_email_address
email = self.email
a = create_html_element 'a'
#a.attributes['href'] = Text.new("mailto:"+obfuscate(email),false,nil,true)
#a.attributes.add Attribute.new('href',Text.new(
#"mailto:"+obfuscate(email),false,nil,true))
# Sorry, for the moment it doesn't work
a.attributes['href'] = "mailto:#{email}"
a << Text.new(obfuscate(email),false,nil,true)
a
end
##### Images
def to_html_image
a = create_html_element 'img'
id = self.ref_id
if ref = @doc.refs[id]
url = ref[:url]
title = ref[:title]
a.attributes['src'] = url.to_s
a.attributes['alt'] = title.to_s
[:title, :class, :style].each do |s|
a.attributes[s.to_s] = ref[s] if ref[s]
end
else
maruku_error"Could not find id = #{id.inspect} for\n #{self.inspect}"
tell_user "Could not create image with ref_id = #{id.inspect};"+
" Using SPAN element as replacement."
return wrap_as_element('span')
end
return a
end
def to_html_im_image
if not url = self.url
maruku_error"Image with no url: #{self.inspect}"
tell_user "Could not create image with ref_id = #{id.inspect};"+
+" Using SPAN element as replacement."
return wrap_as_element('span')
end
title = self.title
a = create_html_element 'img'
a.attributes['src'] = url
a.attributes['alt'] = title.to_s
return a
end
def to_html_raw_html
raw_html = self.raw_html
if rexml_doc = @parsed_html
root = rexml_doc.root
if root.nil?
s = "Bug in REXML: root() of Document is nil: \n#{rexml_doc.inspect}\n"+
"Raw HTML:\n#{raw_html.inspect}"
maruku_error s
tell_user 'The REXML version you have has a bug, omitting HTML'
div = Element.new 'div'
#div << Text.new(s)
return div
end
# copies the @children array (FIXME is it deep?)
elements = root.to_a
return elements
else # invalid
# Creates red box with offending HTML
tell_user "Wrapping bad html in a PRE with class 'markdown-html-error'\n"+
add_tabs(raw_html,1,'|')
pre = Element.new('pre')
pre.attributes['style'] = 'border: solid 3px red; background-color: pink'
pre.attributes['class'] = 'markdown-html-error'
pre << Text.new("HTML parse error: \n#{raw_html}", true)
return pre
end
end
def to_html_abbr
abbr = Element.new 'abbr'
abbr << Text.new(children[0])
abbr.attributes['title'] = self.title if self.title
abbr
end
def to_html_footnote_reference
id = self.footnote_id
# save the order of used footnotes
order = @doc.footnotes_order
# take next number
order << id
num = order.size;
sup = Element.new 'sup'
sup.attributes['id'] = "fnref:#{num}"
a = Element.new 'a'
a << Text.new(num.to_s)
a.attributes['href'] = "\#fn:#{num}"
a.attributes['rel'] = 'footnote'
sup << a
sup
end
## Definition lists ###
def to_html_definition_list() add_ws wrap_as_element('dl') end
def to_html_definition() children_to_html end
def to_html_definition_term() add_ws wrap_as_element('dt') end
def to_html_definition_data() add_ws wrap_as_element('dd') end
# FIXME: Ugly code
def to_html_table
align = self.align
num_columns = align.size
head = @children.slice(0, num_columns)
rows = []
i = num_columns
while i<@children.size
rows << @children.slice(i, num_columns)
i += num_columns
end
table = create_html_element 'table',
[:summary, :width, :frame, :rules, :border, :cellspacing, :cellpadding]
thead = Element.new 'thead'
tr = Element.new 'tr'
array_to_html(head).each do |x| tr<<x end
thead << tr
table << thead
tbody = Element.new 'tbody'
rows.each do |row|
tr = Element.new 'tr'
array_to_html(row).each_with_index do |x,i|
x.attributes['style'] ="text-align: #{align[i].to_s};"
tr<<x
end
tbody << tr << Text.new("\n")
end
table << tbody
table
end
def to_html_head_cell; wrap_as_element('th') end
def to_html_cell
if @attributes[:scope]
wrap_as_element('th', [:scope])
else
wrap_as_element('td')
end
end
def to_html_entity
MaRuKu::Out::Latex.need_entity_table
entity_name = self.entity_name
if (e = MaRuKu::Out::Latex::ENTITY_TABLE[entity_name]) && e.html_num
entity_name = e.html_num
end
# Fix for Internet Explorer
if entity_name == 'apos'
entity_name = 39
end
if entity_name.kind_of? Fixnum
# Entity.new(entity_name)
Text.new('&#%d;' % [entity_name], false, nil, true)
else
Text.new('&%s;' % [entity_name])
end
end
def to_html_xml_instr
target = self.target || ''
code = self.code || ''
REXML::Instruction.new(target, code)
end
# Convert each child to html
def children_to_html
array_to_html(@children)
end
def array_to_html(array)
e = []
array.each do |c|
method = c.kind_of?(MDElement) ?
"to_html_#{c.node_type}" : "to_html"
if not c.respond_to?(method)
#raise "Object does not answer to #{method}: #{c.class} #{c.inspect}"
next
end
h = c.send(method)
if h.nil?
raise "Nil html created by method #{method}:\n#{h.inspect}\n"+
" for object #{c.inspect[0,300]}"
end
if h.kind_of?Array
e = e + h #h.each do |hh| e << hh end
else
e << h
end
end
e
end
def to_html_ref_definition; [] end
def to_latex_ref_definition; [] end
end # HTML
end # out
end # MaRuKu

View file

@ -0,0 +1,563 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
class MDDocument
Latex_preamble_enc_cjk =
"\\usepackage[C40]{fontenc}
\\usepackage[cjkjis]{ucs}
\\usepackage[utf8x]{inputenc}"
Latex_preamble_enc_utf8 =
"\\usepackage{ucs}
\\usepackage[utf8x]{inputenc}"
def latex_require_package(p)
if not self.latex_required_packages.include? p
self.latex_required_packages.push p
end
end
# Render as a LaTeX fragment
def to_latex
children_to_latex
end
=begin maruku_doc
Attribute: maruku_signature
Scope: document
Output: html, latex
Summary: Enables Maruku's signature.
Default: true
If false, Maruku does not append a signature to the
generated file.
=end
# Render as a complete LaTeX document
def to_latex_document
body = to_latex
if get_setting(:maruku_signature)
body += render_latex_signature
end
required =
self.latex_required_packages.map {|p|
"\\usepackage{#{p}}\n"
}.join
=begin maruku_doc
Attribute: latex_cjk
Scope: document
Output: latex
Summary: Support for CJK characters.
If the `latex_cjk` attribute is specified, then appropriate headers
are added to the LaTeX preamble to support Japanese fonts.
You have to have these fonts installed -- and this can be a pain.
If `latex_cjk` is specified, this is added to the preamble:
<?mrk puts "ciao" ?>
<?mrk md_codeblock(Maruku::MDDocument::Latex_preamble_enc_cjk) ?>
while the default is to add this:
<?mrk md_codeblock(Maruku::MDDocument::Latex_preamble_enc_utf8) ?>
=end
encoding = get_setting(:latex_cjk) ?
Latex_preamble_enc_cjk : Latex_preamble_enc_utf8
=begin maruku_doc
Attribute: latex_preamble
Scope: document
Output: latex
Summary: User-defined preamble.
If the `latex_preamble` attribute is specified, then its value
will be used as a custom preamble.
For example:
Title: My document
Latex preamble: preamble.tex
will produce:
...
\input{preamble.tex}
...
=end
user_preamble = (file = @doc.attributes[:latex_preamble]) ?
"\\input{#{file}}\n" : ""
"\\documentclass{article}
% Packages required to support encoding
#{encoding}
% Packages required by code
#{required}
% Packages always used
\\usepackage{hyperref}
\\usepackage{xspace}
\\usepackage[usenames,dvipsnames]{color}
\\hypersetup{colorlinks=true,urlcolor=blue}
#{user_preamble}
\\begin{document}
#{body}
\\end{document}
"
end
def render_latex_signature
"\\vfill
\\hrule
\\vspace{1.2mm}
\\begin{tiny}
Created by \\href{http://maruku.rubyforge.org}{Maruku} #{self.nice_date}.
\\end{tiny}"
end
end end
module MaRuKu; module Out; module Latex
def to_latex_hrule; "\n\\vspace{.5em} \\hrule \\vspace{.5em}\n" end
def to_latex_linebreak; "\\linebreak " end
def to_latex_paragraph
children_to_latex+"\n\n"
end
=begin maruku_doc
Title: Input format for colors
Output: latex, html
Related: code_background_color
Admissible formats:
green
#abc
#aabbcc
=end
# \color[named]{name}
# \color[rgb]{1,0.2,0.3}
def latex_color(s, command='color')
if s =~ /^\#(\w\w)(\w\w)(\w\w)$/
r = $1.hex; g = $2.hex; b=$3.hex
# convert from 0-255 to 0.0-1.0
r = r / 255.0; g = g / 255.0; b = b / 255.0;
"\\#{command}[rgb]{%0.2f,%0.2f,%0.2f}" % [r,g,b]
elsif s =~ /^\#(\w)(\w)(\w)$/
r = $1.hex; g = $2.hex; b=$3.hex
# convert from 0-15 to 0.0-1.0
r = r / 15.0; g = g / 15.0; b = b / 15.0;
"\\#{command}[rgb]{%0.2f,%0.2f,%0.2f}" % [r,g,b]
else
"\\#{command}{#{s}}"
end
end
=begin maruku_doc
Attribute: code_show_spaces
Scope: global, document, element
If `true`, shows spaces and tabs in code blocks.
Example:
One space
Two spaces
Tab, space, tab
Tab, tab, tab and all is green!
{:code_show_spaces code_background_color=#ffeedd}
{:markdown}
That will produce:
One space
Two spaces
Tab, space, tab
Tab, tab, tab and all is green!
{:code_show_spaces code_background_color=#ffeedd}
=end
=begin maruku_doc
Attribute: latex_use_listings
Scope: document
Output: latex
Summary: Support for `listings` package.
Related: code_show_spaces, code_background_color, lang, code_lang
If the `latex_use_listings` attribute is specified, then
code block are rendered using the `listings` package.
Otherwise, a standard `verbatim` environment is used.
* If the `lang` attribute for the code block has been specified,
it gets passed to the `listings` package using the `lstset` macro.
The default lang for code blocks is specified through
the `code_lang` attribute.
\lstset{language=ruby}
Please refer to the documentation of the `listings` package for
supported languages.
If a language is not supported, the `listings` package will emit
a warning during the compilation. Just press enter and nothing
wrong will happen.
* If the `code_show_spaces` is specified, than spaces and tabs will
be shown using the macro:
\lstset{showspaces=true,showtabs=true}
* The background color is given by `code_background_color`.
=end
def to_latex_code;
raw_code = self.raw_code
if get_setting(:latex_use_listings)
@doc.latex_require_package('listings')
s = "\\lstset{columns=fixed,frame=shadowbox}"
if get_setting(:code_show_spaces)
s+= "\\lstset{showspaces=true,showtabs=true}\n"
else
s+= "\\lstset{showspaces=false,showtabs=false}\n"
end
color = latex_color get_setting(:code_background_color)
s+= "\\lstset{backgroundcolor=#{color}}\n"
s+= "\\lstset{basicstyle=\\ttfamily\\footnotesize}\n"
lang = self.attributes[:lang] || @doc.attributes[:code_lang] || '{}'
if lang
s += "\\lstset{language=#{lang}}\n"
end
"#{s}\n\\begin{lstlisting}\n#{raw_code}\n\\end{lstlisting}"
else
"\\begin{verbatim}#{raw_code}\\end{verbatim}\n"
end
end
TexHeaders = {
1=>'section',
2=>'subsection',
3=>'subsubsection',
4=>'paragraph'}
def to_latex_header
h = TexHeaders[self.level] || 'paragraph'
title = children_to_latex
if number = section_number
title = number + title
end
if id = self.attributes[:id]
# drop '#' at the beginning
if id[0,1] == '#' then id = [1,id.size] end
%{\\hypertarget{%s}{}\\%s*{{%s}}\\label{%s}\n\n} % [ id, h, title, id ]
else
%{\\%s*{%s}\n\n} % [ h, title]
end
end
def to_latex_ul;
if self.attributes[:toc]
@doc.toc.to_latex
else
wrap_as_environment('itemize')
end
end
def to_latex_quote; wrap_as_environment('quote') end
def to_latex_ol; wrap_as_environment('enumerate') end
def to_latex_li;
"\\item #{children_to_latex}\n"
end
def to_latex_li_span;
"\\item #{children_to_latex}\n"
end
def to_latex_strong
"\\textbf{#{children_to_latex}}"
end
def to_latex_emphasis
"\\emph{#{children_to_latex}}"
end
def wrap_as_span(c)
"{#{c} #{children_to_latex}}"
end
def wrap_as_environment(name)
"\\begin{#{name}}%
#{children_to_latex}
\\end{#{name}}\n"
end
SAFE_CHARS = Set.new((?a..?z).to_a + (?A..?Z).to_a)
# the ultimate escaping
# (is much better than using \verb)
def latex_escape(source)
s="";
source.each_byte do |b|
if b == ?\
s << '~'
elsif SAFE_CHARS.include? b
s << b
else
s += "\\char%d" % b
end
end
s
end
def to_latex_inline_code;
source = self.raw_code
# Convert to printable latex chars
s = latex_escape(source)
color = get_setting(:code_background_color)
colorspec = latex_color(color, 'colorbox')
"#{colorspec}{\\tt #{s}}"
end
def to_latex_immediate_link
url = self.url
text = url.gsub(/^mailto:/,'') # don't show mailto
# gsub('~','$\sim$')
text = latex_escape(text)
if url[0,1] == '#'
url = url[1,url.size]
return "\\hyperlink{#{url}}{#{text}}"
else
return "\\href{#{url}}{#{text}}"
end
end
def to_latex_im_link
url = self.url
if url[0,1] == '#'
url = url[1,url.size]
return "\\hyperlink{#{url}}{#{children_to_latex}}"
else
return "\\href{#{url}}{#{children_to_latex}}"
end
end
def to_latex_link
id = self.ref_id
ref = @doc.refs[id]
if not ref
$stderr.puts "Could not find id = '#{id}'"
return children_to_latex
else
url = ref[:url]
#title = ref[:title] || 'no title'
if url[0,1] == '#'
url = url[1,url.size]
return "\\hyperlink{#{url}}{#{children_to_latex}}"
else
return "\\href{#{url}}{#{children_to_latex}}"
end
end
end
def to_latex_email_address
email = self.email
"\\href{mailto:#{email}}{#{latex_escape(email)}}"
end
def to_latex_table
align = self.align
num_columns = align.size
head = @children.slice(0, num_columns)
rows = []
i = num_columns
while i<@children.size
rows << @children.slice(i, num_columns)
i+=num_columns
end
h = {:center=>'c',:left=>'l',:right=>'r'}
align_string = align.map{|a| h[a]}.join('|')
s = "\\begin{tabular}{#{align_string}}\n"
s += array_to_latex(head, '&') + "\\\\" +"\n"
s += "\\hline \n"
rows.each do |row|
s += array_to_latex(row, '&') + "\\\\" +"\n"
end
s += "\\end{tabular}"
# puts table in its own paragraph
s += "\n\n"
s
end
def to_latex_head_cell; children_to_latex end
def to_latex_cell; children_to_latex end
def to_latex_footnote_reference
id = self.footnote_id
f = @doc.footnotes[id]
if f
"\\footnote{#{f.children_to_latex.strip}} "
else
$stderr.puts "Could not find footnote '#{fid}'"
end
end
def to_latex_raw_html
#'{\bf Raw HTML removed in latex version }'
""
end
## Definition lists ###
def to_latex_definition_list
s = "\\begin{description}\n"
s += children_to_latex
s += "\\end{description}\n"
s
end
def to_latex_definition
terms = self.terms
definitions = self.definitions
s = ""
terms.each do |t|
s +="\n\\item[#{t.children_to_latex}] "
end
definitions.each do |d|
s += "#{d.children_to_latex} \n"
end
s
end
def to_latex_abbr
children_to_latex
end
def to_latex_image
id = self.ref_id
ref = @doc.refs[id]
if not ref
maruku_error "Could not find ref #{id.inspect} for image.\n"+
"Available are: #{@docs.refs.keys.inspect}"
# $stderr.puts "Could not find id = '#{id}'"
""
else
url = ref[:url]
$stderr.puts "Images not supported yet (#{url})"
# "{\\bf Images not supported yet (#{latex_escape(url)})}"
""
end
end
# Convert each child to html
def children_to_latex
array_to_latex(@children)
end
def array_to_latex(array, join_char='')
e = []
array.each do |c|
method = c.kind_of?(MDElement) ?
"to_latex_#{c.node_type}" : "to_latex"
if not c.respond_to?(method)
# raise "Object does not answer to #{method}: #{c.class} #{c.inspect[0,100]}"
next
end
h = c.send(method)
if h.nil?
raise "Nil html for #{c.inspect} created with method #{method}"
end
if h.kind_of?Array
e = e + h
else
e << h
end
end
# puts a space after commands if needed
# e.each_index do |i|
# if e[i] =~ /\\\w+\s*$/ # command
# if (s=e[i+1]) && s[0] == ?\ # space
# e[i] = e[i] + "\\ "
# end
# end
# end
e.join(join_char)
end
end end end # MaRuKu::Out::Latex

View file

@ -0,0 +1,367 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'rexml/document'
module MaRuKu; module Out; module Latex
include REXML
def to_latex_entity
MaRuKu::Out::Latex.need_entity_table
entity_name = self.entity_name
entity = ENTITY_TABLE[entity_name]
if not entity
maruku_error "I don't know how to translate entity '#{entity_name}' "+
"to LaTeX."
return ""
end
replace = entity.latex_string
entity.latex_packages.each do |p|
@doc.latex_require_package p
end
# if replace =~ /^\\/
# replace = replace + " "
# end
if replace
return replace + "{}"
else
tell_user "Cannot translate entity #{entity_name.inspect} to LaTeX."
return entity_name
end
end
class LatexEntity
safe_attr_accessor :html_num, Fixnum
safe_attr_accessor :html_entity, String
safe_attr_accessor :latex_string, String
safe_attr_accessor :latex_packages, Array
end
def Latex.need_entity_table
Latex.init_entity_table if ENTITY_TABLE.empty?
end
# create hash @@entity_to_latex
def Latex.init_entity_table
# $stderr.write "Creating entity table.."
# $stderr.flush
doc = Document.new XML_TABLE
doc.elements.each("//char") do |c|
num = c.attributes['num'].to_i
name = c.attributes['name']
package = c.attributes['package']
convert = c.attributes['convertTo']
convert.gsub!(/@DOUBLEQUOT/,'"')
convert.gsub!(/@QUOT/,"'")
convert.gsub!(/@GT/,">")
convert.gsub!(/@LT/,"<")
convert.gsub!(/@AMP/,"&")
convert.freeze
e = LatexEntity.new
e.html_num = num
e.html_entity = name
e.latex_string = convert
e.latex_packages = package ? package.split : []
ENTITY_TABLE[num] = e
ENTITY_TABLE[name] = e
end
# $stderr.puts "..done."
end
ENTITY_TABLE = {}
# The following is a conversion chart for html elements, courtesy of
# text2html
XML_TABLE ="
<chars>
<char num='913' name='Alpha' convertTo='$A$' />
<char num='914' name='Beta' convertTo='$B$' />
<char num='915' name='Gamma' convertTo='$\\Gamma$' />
<char num='916' name='Delta' convertTo='$\\Delta$' />
<char num='917' name='Epsilon' convertTo='$E$' />
<char num='918' name='Zeta' convertTo='$Z$' />
<char num='919' name='Eta' convertTo='$H$' />
<char num='920' name='Theta' convertTo='$\\Theta$' />
<char num='921' name='Iota' convertTo='$I$' />
<char num='922' name='Kappa' convertTo='$K$' />
<char num='923' name='Lambda' convertTo='$\\Lambda$' />
<char num='924' name='Mu' convertTo='$M$' />
<char num='925' name='Nu' convertTo='$N$' />
<char num='926' name='Xi' convertTo='$\\Xi$' />
<char num='927' name='Omicron' convertTo='$O$' />
<char num='928' name='Pi' convertTo='$\\Pi$' />
<char num='929' name='Rho' convertTo='$P$' />
<char num='931' name='Sigma' convertTo='$\\Sigma$' />
<char num='932' name='Tau' convertTo='$T$' />
<char num='933' name='Upsilon' convertTo='$Y$' />
<char num='934' name='Phi' convertTo='$\\Phi$' />
<char num='935' name='Chi' convertTo='$X$' />
<char num='936' name='Psi' convertTo='$\\Psi$' />
<char num='937' name='Omega' convertTo='$\\Omega$' />
<char num='945' name='alpha' convertTo='$\\alpha$' />
<char num='946' name='beta' convertTo='$\\beta$' />
<char num='947' name='gamma' convertTo='$\\gamma$' />
<char num='948' name='delta' convertTo='$\\delta$' />
<char num='949' name='epsilon' convertTo='$\\epsilon$' />
<char num='950' name='zeta' convertTo='$\\zeta$' />
<char num='951' name='eta' convertTo='$\\eta$' />
<char num='952' name='theta' convertTo='$\\theta$' />
<char num='953' name='iota' convertTo='$\\iota$' />
<char num='954' name='kappa' convertTo='$\\kappa$' />
<char num='955' name='lambda' convertTo='$\\lambda$' />
<char num='956' name='mu' convertTo='$\\mu$' />
<char num='957' name='nu' convertTo='$\\nu$' />
<char num='958' name='xi' convertTo='$\\xi$' />
<char num='959' name='omicron' convertTo='$o$' />
<char num='960' name='pi' convertTo='$\\pi$' />
<char num='961' name='rho' convertTo='$\\rho$' />
<char num='963' name='sigma' convertTo='$\\sigma$' />
<char num='964' name='tau' convertTo='$\\tau$' />
<char num='965' name='upsilon' convertTo='$\\upsilon$' />
<char num='966' name='phi' convertTo='$\\phi$' />
<char num='967' name='chi' convertTo='$\\chi$' />
<char num='968' name='psi' convertTo='$\\psi$' />
<char num='969' name='omega' convertTo='$\\omega$' />
<char num='962' name='sigmaf' convertTo='$\\varsigma$' />
<char num='977' name='thetasym' convertTo='$\\vartheta$' />
<char num='982' name='piv' convertTo='$\\varpi$' />
<char num='8230' name='hellip' convertTo='\\ldots' />
<char num='8242' name='prime' convertTo='$\\prime$' />
<char num='8254' name='oline' convertTo='-' />
<char num='8260' name='frasl' convertTo='/' />
<char num='8472' name='weierp' convertTo='$\\wp$' />
<char num='8465' name='image' convertTo='$\\Im$' />
<char num='8476' name='real' convertTo='$\\Re$' />
<char num='8501' name='alefsym' convertTo='$\\aleph$' />
<char num='8226' name='bull' convertTo='$\\bullet$' />
<char num='8482' name='trade' convertTo='$^{\\rm TM}$' /> <!-- \texttrademark -->
<char num='8592' name='larr' convertTo='$\\leftarrow$' />
<char num='8594' name='rarr' convertTo='$\\rightarrow$' />
<char num='8593' name='uarr' convertTo='$\\uparrow$' />
<char num='8595' name='darr' convertTo='$\\downarrow$' />
<char num='8596' name='harr' convertTo='$\\leftrightarrow$' />
<char num='8629' name='crarr' convertTo='$\\hookleftarrow$' />
<char num='8657' name='uArr' convertTo='$\\Uparrow$' />
<char num='8659' name='dArr' convertTo='$\\Downarrow$' />
<char num='8656' name='lArr' convertTo='$\\Leftarrow$' />
<char num='8658' name='rArr' convertTo='$\\Rightarrow$' />
<char num='8660' name='hArr' convertTo='$\\Leftrightarrow$' />
<char num='8704' name='forall' convertTo='$\\forall$' />
<char num='8706' name='part' convertTo='$\\partial$' />
<char num='8707' name='exist' convertTo='$\\exists$' />
<char num='8709' name='empty' convertTo='$\\emptyset$' />
<char num='8711' name='nabla' convertTo='$\\nabla$' />
<char num='8712' name='isin' convertTo='$\\in$' />
<char num='8715' name='ni' convertTo='$\\ni$' />
<char num='8713' name='notin' convertTo='$\\notin$' />
<char num='8721' name='sum' convertTo='$\\sum$' />
<char num='8719' name='prod' convertTo='$\\prod$' />
<char num='8722' name='minus' convertTo='$-$' />
<char num='8727' name='lowast' convertTo='$\\ast$' />
<char num='8730' name='radic' convertTo='$\\surd$' />
<char num='8733' name='prop' convertTo='$\\propto$' />
<char num='8734' name='infin' convertTo='$\\infty$' />
<char num='8736' name='ang' convertTo='$\\angle$' />
<char num='8743' name='and' convertTo='$\\wedge$' />
<char num='8744' name='or' convertTo='$\\vee$' />
<char num='8745' name='cup' convertTo='$\\cup$' />
<char num='8746' name='cap' convertTo='$\\cap$' />
<char num='8747' name='int' convertTo='$\\int$' />
<char num='8756' name='there4' convertTo='$\\therefore$' package='amssymb' /> <!-- only AMS -->
<char num='8764' name='sim' convertTo='$\\sim$' />
<char num='8776' name='asymp' convertTo='$\\approx$' />
<char num='8773' name='cong' convertTo='$\\cong$' />
<char num='8800' name='ne' convertTo='$\\neq$' />
<char num='8801' name='equiv' convertTo='$\\equiv$' />
<char num='8804' name='le' convertTo='$\\leq$' />
<char num='8805' name='ge' convertTo='$\\geq$' />
<char num='8834' name='sub' convertTo='$\\subset$' />
<char num='8835' name='sup' convertTo='$\\supset$' />
<!-- <char num='8838' name='sube' convertTo='$\\subseteq$' />-->
<char num='8839' name='supe' convertTo='$\\supseteq$' />
<!-- <char num='8836' name='nsub' convertTo='$\\nsubset$' /> <!-- only AMS -->
<char num='8853' name='oplus' convertTo='$\\oplus$' />
<char num='8855' name='otimes' convertTo='$\\otimes$' />
<char num='8869' name='perp' convertTo='$\\perp$' />
<char num='8901' name='sdot' convertTo='$\\cdot$' />
<char num='8968' name='rceil' convertTo='$\\rceil$' />
<char num='8969' name='lceil' convertTo='$\\lceil$' />
<char num='8970' name='lfloor' convertTo='$\\lfloor$' />
<char num='8971' name='rfloor' convertTo='$\\rfloor$' />
<char num='9001' name='rang' convertTo='$\\rangle$' />
<char num='9002' name='lang' convertTo='$\\langle$' />
<char num='9674' name='loz' convertTo='$\\lozenge$' package='amssymb' /> <!-- only AMS -->
<char num='9824' name='spades' convertTo='$\\spadesuit$' />
<char num='9827' name='clubs' convertTo='$\\clubsuit$' />
<char num='9829' name='hearts' convertTo='$\\heartsuit$' />
<char num='9830' name='diams' convertTo='$\\diamondsuit$' />
<char num='38' name='amp' convertTo='\\@AMP' />
<!-- <char num='34' name='quot' convertTo='\\@DOUBLEQUOT' /> XXX -->
<char num='34' name='quot' convertTo='\"' />
<char num='39' name='apos' convertTo=\"'\" />
<char num='169' name='copy' convertTo='\\copyright' />
<char num='60' name='lt' convertTo='$@LT$' />
<char num='62' name='gt' convertTo='$@GT$' />
<char num='338' name='OElig' convertTo='\\OE' />
<char num='339' name='oelig' convertTo='\\oe' />
<char num='352' name='Scaron' convertTo='\\v{S}' />
<char num='353' name='scaron' convertTo='\\v{s}' />
<char num='376' name='Yuml' convertTo='\\\"Y' />
<char num='710' name='circ' convertTo='\\textasciicircum' />
<char num='732' name='tilde' convertTo='\\textasciitilde' />
<char num='8211' name='ndash' convertTo='--' />
<char num='8212' name='mdash' convertTo='---' />
<char num='8216' name='lsquo' convertTo='`' />
<char num='8217' name='rsquo' convertTo=\"'\" /> <!-- XXXX -->
<char num='8220' name='ldquo' convertTo='``' />
<char num='8221' name='rdquo' convertTo=\"''\" /> <!-- XXXX -->
<char num='8224' name='dagger' convertTo='\\dag' />
<char num='8225' name='Dagger' convertTo='\\ddag' />
<char num='8240' name='permil' convertTo='\\permil' package='wasysym' /> <!-- wasysym package -->
<char num='8364' name='euro' convertTo='\\euro' package='eurosym' /> <!-- eurosym package -->
<char num='8249' name='lsaquo' convertTo='\\guilsinglleft' package='aeguill'/>
<char num='8250' name='rsaquo' convertTo='\\guilsinglright' package='aeguill' />
<!-- <char num='160' name='nbsp' convertTo='\\nolinebreak' />-->
<char num='160' name='nbsp' convertTo='~' />
<char num='161' name='iexcl' convertTo='\\textexclamdown' />
<char num='163' name='pound' convertTo='\\pounds' />
<char num='164' name='curren' convertTo='\\currency' package='wasysym' /> <!-- wasysym package -->
<char num='165' name='yen' convertTo='\\textyen' package='textcomp'/> <!-- textcomp -->
<char num='166' name='brvbar' convertTo='\\brokenvert' /> <!-- wasysym -->
<char num='167' name='sect' convertTo='\\S' />
<char num='171' name='laquo' convertTo='\\guillemotleft' package='aeguill'/>
<char num='187' name='raquo' convertTo='\\guillemotright' package='aeguill'/>
<char num='174' name='reg' convertTo='\\textregistered' />
<char num='170' name='ordf' convertTo='\\textordfeminine' />
<char num='172' name='not' convertTo='$\\neg$' />
<!-- <char num='176' name='deg' convertTo='$\\degree$' /> <!-- mathabx -->
<char num='176' name='deg' convertTo='\\textdegree' package='textcomp'/>
<char num='177' name='plusmn' convertTo='$\\pm$' />
<char num='180' name='acute' convertTo='@QUOT' />
<char num='181' name='micro' convertTo='$\\mu$' />
<char num='182' name='para' convertTo='\\P' />
<char num='183' name='middot' convertTo='$\\cdot$' />
<char num='186' name='ordm' convertTo='\\textordmasculine' />
<char num='162' name='cent' convertTo='\\cent' package='wasysym' />
<char num='185' name='sup1' convertTo='$^1$' />
<char num='178' name='sup2' convertTo='$^2$' />
<char num='179' name='sup3' convertTo='$^3$' />
<char num='189' name='frac12' convertTo='$\\frac{1}{2}$' />
<char num='188' name='frac14' convertTo='$\\frac{1}{4}$' />
<char num='190' name='frac34' convertTo='$\\frac{3}{4}$' />
<char num='192' name='Agrave' convertTo='\\`A' />
<char num='193' name='Aacute' convertTo='\\@QUOTA' />
<char num='194' name='Acirc' convertTo='\\^A' />
<char num='195' name='Atilde' convertTo='\\~A' />
<char num='196' name='Auml' convertTo='\\@DOUBLEQUOTA' />
<char num='197' name='Aring' convertTo='\\AA' />
<char num='198' name='AElig' convertTo='\\AE' />
<char num='199' name='Ccedil' convertTo='\\c{C}' />
<char num='200' name='Egrave' convertTo='\\`E' />
<char num='201' name='Eacute' convertTo='\\@QUOTE' />
<char num='202' name='Ecirc' convertTo='\\^E' />
<char num='203' name='Euml' convertTo='\\@DOUBLEQUOTE' />
<char num='204' name='Igrave' convertTo='\\`I' />
<char num='205' name='Iacute' convertTo='\\@QUOTI' />
<char num='206' name='Icirc' convertTo='\\^I' />
<char num='207' name='Iuml' convertTo='\\\"I' />
<char num='208' name='ETH' convertTo='$\\eth$' /> <!-- AMS -->
<char num='209' name='Ntilde' convertTo='\\~N' />
<char num='210' name='Ograve' convertTo='\\`O' />
<char num='211' name='Oacute' convertTo='\\@QUOT O' />
<char num='212' name='Ocirc' convertTo='\\^O' />
<char num='213' name='Otilde' convertTo='\\~O' />
<char num='214' name='Ouml' convertTo='\\@DOUBLEQUOTO' />
<char num='215' name='times' convertTo='$\\times$' />
<char num='216' name='Oslash' convertTo='\\O' />
<char num='217' name='Ugrave' convertTo='\\`U' />
<char num='218' name='Uacute' convertTo='\\@QUOTU' />
<char num='219' name='Ucirc' convertTo='\\^U' />
<char num='220' name='Uuml' convertTo='\\@DOUBLEQUOTU' />
<char num='221' name='Yacute' convertTo='\\@QUOTY' />
<char num='223' name='szlig' convertTo='\\ss' />
<char num='224' name='agrave' convertTo='\\`a' />
<char num='225' name='aacute' convertTo='\\@QUOTa' />
<char num='226' name='acirc' convertTo='\\^a' />
<char num='227' name='atilde' convertTo='\\~a' />
<char num='228' name='auml' convertTo='\\@DOUBLEQUOTa' />
<char num='229' name='aring' convertTo='\\aa' />
<char num='230' name='aelig' convertTo='\\ae' />
<char num='231' name='ccedil' convertTo='\\c{c}' />
<char num='232' name='egrave' convertTo='\\`e' />
<char num='233' name='eacute' convertTo='\\@QUOTe' />
<char num='234' name='ecirc' convertTo='\\^e' />
<char num='235' name='euml' convertTo='\\@DOUBLEQUOTe' />
<char num='236' name='igrave' convertTo='\\`i' />
<char num='237' name='iacute' convertTo='\\@QUOTi' />
<char num='238' name='icirc' convertTo='\\^i' />
<char num='239' name='iuml' convertTo='\\@DOUBLEQUOTi' />
<char num='240' name='eth' convertTo='$\\eth$' package='amssymb'/> <!-- -->
<char num='241' name='ntilde' convertTo='\\~n' />
<char num='242' name='ograve' convertTo='\\`o' />
<char num='243' name='oacute' convertTo='\\@QUOTo' />
<char num='244' name='ocirc' convertTo='\\^o' />
<char num='245' name='otilde' convertTo='\\~o' />
<char num='246' name='ouml' convertTo='\\@DOUBLEQUOTo' />
<!-- <char num='247' name='divide' convertTo='$\\divide$' /> -->
<char num='248' name='oslash' convertTo='\\o' />
<char num='249' name='ugrave' convertTo='\\`u' />
<char num='250' name='uacute' convertTo='\\@QUOTu' />
<char num='251' name='ucirc' convertTo='\\^u' />
<char num='252' name='uuml' convertTo='\\@DOUBLEQUOTu' />
<char num='253' name='yacute' convertTo='\\@QUOTy' />
<char num='255' name='yuml' convertTo='\\@DOUBLEQUOTy' />
<char num='222' name='THORN' convertTo='\\Thorn' package='wasysym' />
<char num='254' name='thorn' convertTo='\\thorn' package='wasysym' />
</chars>"
end end end

View file

@ -0,0 +1,64 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class String
# These are TeX's special characters
LATEX_ADD_SLASH = [ ?{, ?}, ?$, ?&, ?#, ?_, ?%]
# These, we transform to {\tt \char<ascii code>}
LATEX_TO_CHARCODE = [ ?^, ?~, ?>,?<]
def escape_to_latex(s)
s2 = ""
s.each_byte do |b|
if LATEX_TO_CHARCODE.include? b
s2 += "{\\tt \\char#{b}}"
elsif LATEX_ADD_SLASH.include? b
s2 << ?\\ << b
elsif b == ?\\
# there is no backslash in cmr10 fonts
s2 += "$\\backslash$"
else
s2 << b
end
end
s2
end
# escapes special characters
def to_latex
s = escape_to_latex(self)
OtherGoodies.each do |k, v|
s.gsub!(k, v)
end
s
end
# other things that are good on the eyes
OtherGoodies = {
/(\s)LaTeX/ => '\1\\LaTeX\\xspace ', # XXX not if already \LaTeX
# 'HTML' => '\\textsc{html}\\xspace ',
# 'PDF' => '\\textsc{pdf}\\xspace '
}
end

View file

@ -0,0 +1,164 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class String
# XXX: markdown escaping
def to_md(c=nil)
to_s
end
# " andrea censi " => [" andrea ", "censi "]
def mysplit
split.map{|x| x+" "}
end
end
module MaRuKu; module Out; module Markdown
DefaultLineLength = 40
def to_md(context={})
children_to_md(context)
end
def to_md_paragraph(context)
line_length = context[:line_length] || DefaultLineLength
wrap(@children, line_length, context)+"\n"
end
def to_md_li_span(context)
len = (context[:line_length] || DefaultLineLength) - 2
s = add_tabs(wrap(@children, len-2, context), 1, ' ')
s[0] = ?*
s + "\n"
end
def to_md_abbr_def(context)
"*[#{self.abbr}]: #{self.text}\n"
end
def to_md_ol(context)
len = (context[:line_length] || DefaultLineLength) - 2
md = ""
self.children.each_with_index do |li, i|
s = add_tabs(w=wrap(li.children, len-2, context), 1, ' ')+"\n"
s[0,4] = "#{i+1}. "[0,4]
# puts w.inspect
md += s
end
md + "\n"
end
def to_md_ul(context)
len = (context[:line_length] || DefaultLineLength) - 2
md = ""
self.children.each_with_index do |li, i|
w = wrap(li.children, len-2, context)
# puts "W: "+ w.inspect
s = add_indent(w)
# puts "S: " +s.inspect
s[0,1] = "-"
md += s
end
md + "\n"
end
def add_indent(s,char=" ")
t = s.split("\n").map{|x| char+x }.join("\n")
s << ?\n if t[-1] == ?\n
s
end
# Convert each child to html
def children_to_md(context)
array_to_md(@children, context)
end
def wrap(array, line_length, context)
out = ""
line = ""
array.each do |c|
if c.kind_of?(MDElement) && c.node_type == :linebreak
out << line.strip << " \n"; line="";
next
end
pieces =
if c.kind_of? String
c.to_md.mysplit
else
[c.to_md(context)].flatten
end
# puts "Pieces: #{pieces.inspect}"
pieces.each do |p|
if p.size + line.size > line_length
out << line.strip << "\n";
line = ""
end
line << p
end
end
out << line.strip << "\n" if line.size > 0
out << ?\n if not out[-1] == ?\n
out
end
def array_to_md(array, context, join_char='')
e = []
array.each do |c|
method = c.kind_of?(MDElement) ?
"to_md_#{c.node_type}" : "to_md"
if not c.respond_to?(method)
#raise "Object does not answer to #{method}: #{c.class} #{c.inspect[0,100]}"
# tell_user "Using default for #{c.node_type}"
method = 'to_md'
end
# puts "#{c.inspect} created with method #{method}"
h = c.send(method, context)
if h.nil?
raise "Nil md for #{c.inspect} created with method #{method}"
end
if h.kind_of?Array
e = e + h
else
e << h
end
end
e.join(join_char)
end
end end end
module MaRuKu; class MDDocument
alias old_md to_md
def to_md(context={})
s = old_md(context)
# puts s
s
end
end end

View file

@ -0,0 +1,53 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
class MDElement
# Strips all formatting from the string
def to_s
children_to_s
end
def children_to_s
@children.join
end
# Generate an id for headers. Assumes @children is set.
def generate_id
title = children_to_s
title.gsub!(/ /,'_')
title.downcase!
title.gsub!(/[^\w_]/,'')
title.strip!
if title.size == 0
$uid ||= 0
$uid += 1
title = "id#{$uid}"
end
title
end
end
end

View file

@ -0,0 +1,184 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
# Boring stuff with strings.
module MaRuKu; module Strings
def add_tabs(s,n=1,char="\t")
s.split("\n").map{|x| char*n+x }.join("\n")
end
TabSize = 4;
def split_lines(s)
s.split("\n")
end
# This parses email headers. Returns an hash.
#
# +hash['data']+ is the message.
#
# Keys are downcased, space becomes underscore, converted to symbols.
#
# My key: true
#
# becomes:
#
# {:my_key => true}
#
def parse_email_headers(s)
keys={}
match = (s =~ /((\w[\w\s]+: .*\n)+)\n/)
if match != 0
keys[:data] = s
else
keys[:data] = $'
headers = $1
headers.split("\n").each do |l|
k, v = l.split(':')
k, v = normalize_key_and_value(k, v)
k = k.to_sym
# puts "K = #{k}, V=#{v}"
keys[k] = v
end
end
keys
end
# Keys are downcased, space becomes underscore, converted to symbols.
def normalize_key_and_value(k,v)
v = v ? v.strip : true # no value defaults to true
k = k.strip
# check synonyms
v = true if ['yes','true'].include?(v.to_s.downcase)
v = false if ['no','false'].include?(v.to_s.downcase)
k = k.downcase.gsub(' ','_')
return k, v
end
# Returns the number of leading spaces, considering that
# a tab counts as `TabSize` spaces.
def number_of_leading_spaces(s)
n=0; i=0;
while i < s.size
c = s[i,1]
if c == ' '
i+=1; n+=1;
elsif c == "\t"
i+=1; n+=TabSize;
else
break
end
end
n
end
# This returns the position of the first real char in a list item
#
# For example:
# '*Hello' # => 1
# '* Hello' # => 2
# ' * Hello' # => 3
# ' * Hello' # => 5
# '1.Hello' # => 2
# ' 1. Hello' # => 5
def spaces_before_first_char(s)
case s.md_type
when :ulist
i=0;
# skip whitespace if present
while s[i,1] =~ /\s/; i+=1 end
# skip indicator (+, -, *)
i+=1
# skip optional whitespace
while s[i,1] =~ /\s/; i+=1 end
return i
when :olist
i=0;
# skip whitespace
while s[i,1] =~ /\s/; i+=1 end
# skip digits
while s[i,1] =~ /\d/; i+=1 end
# skip dot
i+=1
# skip whitespace
while s[i,1] =~ /\s/; i+=1 end
return i
else
tell_user "BUG (my bad): '#{s}' is not a list"
0
end
end
# Counts the number of leading '#' in the string
def num_leading_hashes(s)
i=0;
while i<(s.size-1) && (s[i,1]=='#'); i+=1 end
i
end
# Strips initial and final hashes
def strip_hashes(s)
s = s[num_leading_hashes(s), s.size]
i = s.size-1
while i > 0 && (s[i,1] =~ /(#|\s)/); i-=1; end
s[0, i+1].strip
end
# removes initial quote
def unquote(s)
s.gsub(/^>\s?/,'')
end
# toglie al massimo n caratteri
def strip_indent(s, n)
i = 0
while i < s.size && n>0
c = s[i,1]
if c == ' '
n-=1;
elsif c == "\t"
n-=TabSize;
else
break
end
i+=1
end
s[i, s.size-1]
end
def dbg_describe_ary(a, prefix='')
i = 0
a.each do |l|
puts "#{prefix} (#{i+=1})# #{l.inspect}"
end
end
def force_linebreak?(l)
l =~ / $/
end
end
end

View file

@ -0,0 +1,165 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class Module
def safe_attr_accessor1(symbol, klass)
attr_reader symbol
code = <<-EOF
def #{symbol}=(val)
if not val.kind_of? #{klass}
s = "\nCould not assign an object of type \#{val.class} to #{symbol}.\n\n"
s += "Tried to assign object of class \#{val.class}:\n"+
"\#{val.inspect}\n"+
"to \#{self.class}::#{symbol} constrained to be of class #{klass}.\n"
raise s
end
@#{symbol} = val
end
EOF
module_eval code
end
def safe_attr_accessor2(symbol, klass)
attr_accessor symbol
end
alias safe_attr_accessor safe_attr_accessor2
end
module MaRuKu
# I did not want to have a class for each possible element.
# Instead I opted to have only the class "MDElement"
# that represents eveything in the document (paragraphs, headers, etc).
#
# You can tell what it is by the variable `node_type`.
#
# In the instance-variable `children` there are the children. These
# can be of class 1) String or 2) MDElement.
#
# The @doc variable points to the document to which the MDElement
# belongs (which is an instance of Maruku, subclass of MDElement).
#
# Attributes are contained in the hash `attributes`.
# Keys are symbols (downcased, with spaces substituted by underscores)
#
# For example, if you write in the source document.
#
# Title: test document
# My property: value
#
# content content
#
# You can access `value` by writing:
#
# @doc.attributes[:my_property] # => 'value'
#
# from whichever MDElement in the hierarchy.
#
class MDElement
# See helpers.rb for the list of allowed #node_type values
safe_attr_accessor :node_type, Symbol
# Children are either Strings or MDElement
safe_attr_accessor :children, Array
# An attribute list, may not be nil
safe_attr_accessor :al, Array #Maruku::AttributeList
# These are the processed attributes
safe_attr_accessor :attributes, Hash
# Reference of the document (which is of class Maruku)
attr_accessor :doc
def initialize(node_type=:unset, children=[], meta={},
al=MaRuKu::AttributeList.new )
super();
self.children = children
self.node_type = node_type
@attributes = {}
meta.each do |symbol, value|
self.instance_eval "
def #{symbol}; @#{symbol}; end
def #{symbol}=(val); @#{symbol}=val; end"
self.send "#{symbol}=", value
end
self.al = al || AttributeList.new
self.meta_priv = meta
end
attr_accessor :meta_priv
def ==(o)
ok = o.kind_of?(MDElement) &&
(self.node_type == o.node_type) &&
(self.meta_priv == o.meta_priv) &&
(self.children == o.children)
if not ok
# puts "This:\n"+self.inspect+"\nis different from\n"+o.inspect+"\n\n"
end
ok
end
end
# This represents the whole document and holds global data.
class MDDocument
safe_attr_accessor :refs, Hash
safe_attr_accessor :footnotes, Hash
# This is an hash. The key might be nil.
safe_attr_accessor :abbreviations, Hash
# Attribute lists definition
safe_attr_accessor :ald, Hash
# The order in which footnotes are used. Contains the id.
safe_attr_accessor :footnotes_order, Array
safe_attr_accessor :latex_required_packages, Array
def initialize(s=nil)
super(:document)
@doc = self
self.refs = {}
self.footnotes = {}
self.footnotes_order = []
self.abbreviations = {}
self.ald = {}
self.latex_required_packages = []
parse_doc(s) if s
end
end
end # MaRuKu

View file

@ -0,0 +1,87 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
class String
def inspect_more(a=nil,b=nil)
inspect
end
end
class Object
def inspect_more(a=nil,b=nil)
inspect
end
end
class Array
def inspect_more(compact, join_string, add_brackets=true)
s = map {|x|
x.kind_of?(String) ? x.inspect :
x.kind_of?(MaRuKu::MDElement) ? x.inspect(compact) :
(raise "WTF #{x.class} #{x.inspect}")
}.join(join_string)
add_brackets ? "[#{s}]" : s
end
end
class Hash
def inspect_ordered(a=nil,b=nil)
"{"+keys.map{|x|x.to_s}.sort.map{|x|x.to_sym}.
map{|k| k.inspect + "=>"+self[k].inspect}.join(',')+"}"
end
end
module MaRuKu
class MDElement
def inspect(compact=true)
if compact
i2 = inspect2
return i2 if i2
end
"md_el(:%s,%s,%s,%s)" %
[
self.node_type,
children_inspect(compact),
@meta_priv.inspect_ordered,
self.al.inspect
]
end
def children_inspect(compact=true)
s = @children.inspect_more(compact,', ')
if @children.empty?
"[]"
elsif s.size < 70
s
else
"[\n"+
add_tabs(@children.inspect_more(compact,",\n",false))+
"\n]"
end
end
end
end

View file

@ -0,0 +1,61 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
class MDElement
# Yields to each element of specified node_type
# All elements if e_node_type is nil.
def each_element(e_node_type=nil, &block)
@children.each do |c|
if c.kind_of? MDElement
if (not e_node_type) || (e_node_type == c.node_type)
block.call c
end
c.each_element(e_node_type, &block)
end
end
end
# Apply passed block to each String in the hierarchy.
def replace_each_string(&block)
for c in @children
if c.kind_of? MDElement
c.replace_each_string(&block)
end
end
processed = []
until @children.empty?
c = @children.shift
if c.kind_of? String
result = block.call(c)
[*result].each do |e| processed << e end
else
processed << c
end
end
@children = processed
end
end
end

View file

@ -0,0 +1,82 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'maruku'
#require 'bluecloth'
data = $stdin.read
num = 10
if ARGV.size > 0 && ((n=ARGV[0].to_i) != 0)
num = n
end
methods =
[
[Maruku, :to_html],
# [BlueCloth, :to_html],
[Maruku, :to_latex]
]
#methods = [[Maruku, :class]]
#num = 10
stats =
methods .map do |c, method|
puts "Computing for #{c}"
start = Time.now
doc = nil
for i in 1..num
$stdout.write "#{i} "; $stdout.flush
doc = c.new(data)
end
stop = Time.now
parsing = (stop-start)/num
start = Time.now
for i in 1..num
$stdout.write "#{i} "; $stdout.flush
s = doc.send method
end
stop = Time.now
rendering = (stop-start)/num
puts ("%s (%s): parsing %0.2f sec + rendering %0.2f sec "+
"= %0.2f sec ") % [c, method, parsing,rendering,parsing+rendering]
[c, method, parsing, rendering]
end
puts "\n\n\n"
stats.each do |x| x.push(x[2]+x[3]) end
max = stats.map{|x|x[4]}.max
stats.sort! { |x,y| x[4] <=> y[4] } . reverse!
for c, method, parsing, rendering, tot in stats
puts ("%20s: parsing %0.2f sec + rendering %0.2f sec "+
"= %0.2f sec (%0.2fx)") %
["#{c} (#{method})", parsing,rendering,tot,max/tot]
end

View file

@ -0,0 +1,363 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'maruku'
require 'maruku/ext/math'
module MaRuKu; module Tests
# 5 accented letters in italian, encoded as UTF-8
AccIta8 = "\303\240\303\250\303\254\303\262\303\271"
# Same letters, written in ISO-8859-1 (one byte per letter)
AccIta1 = "\340\350\354\362\371"
# The word MA-RU-KU, written in katakana using UTF-8
Maruku8 = "\343\203\236\343\203\253\343\202\257"
def test_span_parser(verbose, break_on_first_error, quiet)
good_cases = [
["", [], 'Empty string gives empty list'],
["a", ["a"], 'Easy char'],
[" a", ["a"], 'First space in the paragraph is ignored'],
["a\n \n", ["a"], 'Last spaces in the paragraphs are ignored'],
[' ', [], 'One char => nothing'],
[' ', [], 'Two chars => nothing'],
['a b', ['a b'], 'Spaces are compressed'],
['a b', ['a b'], 'Newlines are spaces'],
["a\nb", ['a b'], 'Newlines are spaces'],
["a\n b", ['a b'], 'Compress newlines 1'],
["a \nb", ['a b'], 'Compress newlines 2'],
[" \nb", ['b'], 'Compress newlines 3'],
["\nb", ['b'], 'Compress newlines 4'],
["b\n", ['b'], 'Compress newlines 5'],
["\n", [], 'Compress newlines 6'],
["\n\n\n", [], 'Compress newlines 7'],
[nil, :throw, "Should throw on nil input"],
# Code blocks
["`" , :throw, 'Unclosed single ticks'],
["``" , :throw, 'Unclosed double ticks'],
["`a`" , [md_code('a')], 'Simple inline code'],
["`` ` ``" , [md_code('`')], ],
["`` \\` ``" , [md_code('\\`')], ],
["``a``" , [md_code('a')], ],
["`` a ``" , [md_code('a')], ],
# Newlines
["a \n", ['a',md_el(:linebreak)], 'Two spaces give br.'],
["a \n", ['a'], 'Newlines 2'],
[" \n", [md_el(:linebreak)], 'Newlines 3'],
[" \n \n", [md_el(:linebreak),md_el(:linebreak)],'Newlines 3'],
[" \na \n", [md_el(:linebreak),'a',md_el(:linebreak)],'Newlines 3'],
# Inline HTML
["a < b", ['a < b'], '< can be on itself'],
["<hr>", [md_html('<hr />')], 'HR will be sanitized'],
["<hr/>", [md_html('<hr />')], 'Closed tag is ok'],
["<hr />", [md_html('<hr />')], 'Closed tag is ok 2'],
["<hr/>a", [md_html('<hr />'),'a'], 'Closed tag is ok 2'],
["<em></em>a", [md_html('<em></em>'),'a'], 'Inline HTML 1'],
["<em>e</em>a", [md_html('<em>e</em>'),'a'], 'Inline HTML 2'],
["a<em>e</em>b", ['a',md_html('<em>e</em>'),'b'], 'Inline HTML 3'],
["<em>e</em>a<em>f</em>",
[md_html('<em>e</em>'),'a',md_html('<em>f</em>')],
'Inline HTML 4'],
["<em>e</em><em>f</em>a",
[md_html('<em>e</em>'),md_html('<em>f</em>'),'a'],
'Inline HTML 5'],
["<img src='a' />", [md_html("<img src='a' />")], 'Attributes'],
["<img src='a'/>"],
# emphasis
["**", :throw, 'Unclosed double **'],
["\\*", ['*'], 'Escaping of *'],
["a *b* ", ['a ', md_em('b')], 'Emphasis 1'],
["a *b*", ['a ', md_em('b')], 'Emphasis 2'],
["a * b", ['a * b'], 'Emphasis 3'],
["a * b*", :throw, 'Unclosed emphasis'],
# same with underscore
["__", :throw, 'Unclosed double __'],
["\\_", ['_'], 'Escaping of _'],
["a _b_ ", ['a ', md_em('b')], 'Emphasis 4'],
["a _b_", ['a ', md_em('b')], 'Emphasis 5'],
["a _ b", ['a _ b'], 'Emphasis 6'],
["a _ b_", :throw, 'Unclosed emphasis'],
["_b_", [md_em('b')], 'Emphasis 7'],
["_b_ _c_", [md_em('b'),' ',md_em('c')], 'Emphasis 8'],
["_b__c_", [md_em('b'),md_em('c')], 'Emphasis 9'],
# strong
["**a*", :throw, 'Unclosed double ** 2'],
["\\**a*", ['*', md_em('a')], 'Escaping of *'],
["a **b** ", ['a ', md_strong('b')], 'Emphasis 1'],
["a **b**", ['a ', md_strong('b')], 'Emphasis 2'],
["a ** b", ['a ** b'], 'Emphasis 3'],
["a ** b**", :throw, 'Unclosed emphasis'],
["**b****c**", [md_strong('b'),md_strong('c')], 'Emphasis 9'],
# strong (with underscore)
["__a_", :throw, 'Unclosed double __ 2'],
["\\__a_", ['_', md_em('a')], 'Escaping of _'],
["a __b__ ", ['a ', md_strong('b')], 'Emphasis 1'],
["a __b__", ['a ', md_strong('b')], 'Emphasis 2'],
["a __ b", ['a __ b'], 'Emphasis 3'],
["a __ b__", :throw, 'Unclosed emphasis'],
["__b____c__", [md_strong('b'),md_strong('c')], 'Emphasis 9'],
# extra strong
["***a**", :throw, 'Unclosed triple *** '],
["\\***a**", ['*', md_strong('a')], 'Escaping of *'],
["a ***b*** ", ['a ', md_emstrong('b')], 'Strong elements'],
["a ***b***", ['a ', md_emstrong('b')]],
["a *** b", ['a *** b']],
["a ** * b", ['a ** * b']],
["***b******c***", [md_emstrong('b'),md_emstrong('c')]],
["a *** b***", :throw, 'Unclosed emphasis'],
# same with underscores
["___a__", :throw, 'Unclosed triple *** '],
["\\___a__", ['_', md_strong('a')], 'Escaping of *'],
["a ___b___ ", ['a ', md_emstrong('b')], 'Strong elements'],
["a ___b___", ['a ', md_emstrong('b')]],
["a ___ b", ['a ___ b']],
["a __ _ b", ['a __ _ b']],
["___b______c___", [md_emstrong('b'),md_emstrong('c')]],
["a ___ b___", :throw, 'Unclosed emphasis'],
# mixing is bad
["*a_", :throw, 'Mixing is bad'],
["_a*", :throw],
["**a__", :throw],
["__a**", :throw],
["___a***", :throw],
["***a___", :throw],
# links of the form [text][ref]
["\\[a]", ["[a]"], 'Escaping 1'],
["\\[a\\]", ["[a]"], 'Escaping 2'],
# This is valid in the new Markdown version
# ["[a]", ["a"], 'Not a link'],
["[a]", [ md_link(["a"],'a')], 'Empty link'],
["[a][]", ],
["[a][]b", [ md_link(["a"],'a'),'b'], 'Empty link'],
["[a\\]][]", [ md_link(["a]"],'a]')], 'Escape inside link'],
["[a", :throw, 'Link not closed'],
["[a][", :throw, 'Ref not closed'],
# links of the form [text](url)
["\\[a](b)", ["[a](b)"], 'Links'],
["[a](url)c", [md_im_link(['a'],'url'),'c'], 'url'],
["[a]( url )c" ],
["[a] ( url )c" ],
["[a] ( url)c" ],
["[a](ur:/l/ 'Title')", [md_im_link(['a'],'ur:/l/','Title')],
'url and title'],
["[a] ( ur:/l/ \"Title\")" ],
["[a] ( ur:/l/ \"Title\")" ],
["[a]( ur:/l/ Title)", :throw, "Must quote title" ],
["[a](url 'Tit\\\"l\\\\e')", [md_im_link(['a'],'url','Tit"l\\e')],
'url and title escaped'],
["[a] ( url \"Tit\\\"l\\\\e\")" ],
["[a] ( url \"Tit\\\"l\\\\e\" )" ],
['[a] ( url "Tit\\"l\\\\e" )' ],
["[a]()", [md_im_link(['a'],'')], 'No URL is OK'],
["[a](\"Title\")", :throw, "No url specified" ],
["[a](url \"Title)", :throw, "Unclosed quotes" ],
["[a](url \"Title\\\")", :throw],
["[a](url \"Title\" ", :throw],
["[a](url \'Title\")", :throw, "Mixing is bad" ],
["[a](url \"Title\')"],
["[a](/url)", [md_im_link(['a'],'/url')], 'Funny chars in url'],
["[a](#url)", [md_im_link(['a'],'#url')]],
["[a](</script?foo=1&bar=2>)", [md_im_link(['a'],'/script?foo=1&bar=2')]],
# Images
["\\![a](url)", ['!', md_im_link(['a'],'url') ], 'Escaping images'],
["![a](url)", [md_im_image(['a'],'url')], 'Image no title'],
["![a]( url )" ],
["![a] ( url )" ],
["![a] ( url)" ],
["![a](url 'ti\"tle')", [md_im_image(['a'],'url','ti"tle')], 'Image with title'],
['![a]( url "ti\\"tle")' ],
["![a](url", :throw, 'Invalid images'],
["![a( url )" ],
["![a] ('url )" ],
["![a][imref]", [md_image(['a'],'imref')], 'Image with ref'],
["![a][ imref]"],
["![a][ imref ]"],
["![a][\timref\t]"],
['<http://example.com/?foo=1&bar=2>',
[md_url('http://example.com/?foo=1&bar=2')], 'Immediate link'],
['a<http://example.com/?foo=1&bar=2>b',
['a',md_url('http://example.com/?foo=1&bar=2'),'b'] ],
['<andrea@censi.org>',
[md_email('andrea@censi.org')], 'Email address'],
['<mailto:andrea@censi.org>'],
["Developmen <http://rubyforge.org/projects/maruku/>",
["Developmen ", md_url("http://rubyforge.org/projects/maruku/")]],
["a<!-- -->b", ['a',md_html('<!-- -->'),'b'],
'HTML Comment'],
["a<!--", :throw, 'Bad HTML Comment'],
["a<!-- ", :throw, 'Bad HTML Comment'],
["<?xml <?!--!`3 ?>", [md_xml_instr('xml','<?!--!`3')], 'XML processing instruction'],
["<? <?!--!`3 ?>", [md_xml_instr('','<?!--!`3')] ],
["<? ", :throw, 'Bad Server directive'],
["a <b", :throw, 'Bad HTML 1'],
["<b", :throw, 'Bad HTML 2'],
["<b!", :throw, 'Bad HTML 3'],
['`<div>`, `<table>`, `<pre>`, `<p>`',
[md_code('<div>'),', ',md_code('<table>'),', ',
md_code('<pre>'),', ',md_code('<p>')],
'Multiple HTLM tags'],
["&andrea", ["&andrea"], 'Parsing of entities'],
# no escaping is allowed
# ["\\&andrea;", ["&andrea;"]],
["l&andrea;", ["l", md_entity('andrea')] ],
["&&andrea;", ["&", md_entity('andrea')] ],
["&123;;&amp;",[md_entity('123'),';',md_entity('amp')]],
["a\nThe [syntax page] [s] provides",
['a The ', md_link(['syntax page'],'s'), ' provides'], 'Regression'],
['![a](url "ti"tle")', [md_im_image(['a'],'url','ti"tle')],
"Image with quotes"],
['![a](url \'ti"tle\')' ],
['[bar](/url/ "Title with "quotes" inside")',
[md_im_link(["bar"],'/url/', 'Title with "quotes" inside')],
"Link with quotes"],
# We dropped this idea
# ['$20,000 and $30,000', ['$20,000 and $30,000'], 'Math: spaces'],
['$20,000$', [md_inline_math('20,000')]],
# ['$ 20,000$', ['$ 20,000$']],
# ['$20,000 $ $20,000$', ['$20,000 $ ', md_inline_math('20,000')]],
["#{Maruku8}", [Maruku8], "Reading UTF-8"],
["#{AccIta1}", [AccIta8], "Converting ISO-8859-1 to UTF-8",
{:encoding => 'iso-8859-1'}],
]
good_cases = unit_tests_for_attribute_lists + good_cases
count = 1; last_comment=""; last_expected=:throw
good_cases.each do |t|
if not t[1]
t[1] = last_expected
else
last_expected = t[1]
end
if not t[2]
t[2] = last_comment + " #{count+=1}"
else
last_comment = t[2]; count=1
end
end
@verbose = verbose
m = Maruku.new
m.attributes[:on_error] = :raise
Globals[:debug_keep_ials] = true
good_cases.each do |input, expected, comment|
output = nil
begin
output = m.parse_span_better(input)
#lines = Maruku.split_lines input
#output = m.parse_lines_as_span(lines)
rescue Exception => e
if not expected == :throw
ex = e.inspect+ "\n"+ e.backtrace.join("\n")
s = comment+describe_difference(input, expected, output)
print_status(comment,'CRASHED :-(', ex+s)
raise e if @break_on_first_error
else
quiet || print_status(comment,'OK')
end
end
if not expected == :throw
if not (expected == output)
s = comment+describe_difference(input, expected, output)
print_status(comment, 'FAILED', s)
break if break_on_first_error
else
quiet || print_status(comment, 'OK')
end
else # I expected a raise
if output
s = comment+describe_difference(input, expected, output)
print_status(comment, 'FAILED (no throw)', s)
break if break_on_first_error
end
end
end
end
PAD=40
def print_status(comment, status, verbose_text=nil)
if comment.size < PAD
comment = comment + (" "*(PAD-comment.size))
end
puts "- #{comment} #{status}"
if @verbose and verbose_text
puts verbose_text
end
end
def describe_difference(input, expected, output)
"\nInput:\n #{input.inspect}" +
"\nExpected:\n #{expected.inspect}" +
"\nOutput:\n #{output.inspect}\n"
end
end end
class Maruku
include MaRuKu::Tests
end
verbose = ARGV.include? 'v'
break_on_first = ARGV.include? 'b'
quiet = ARGV.include? 'q'
Maruku.new.test_span_parser(verbose, break_on_first, quiet)

View file

@ -0,0 +1,136 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
require 'maruku'
class Maruku
def Maruku.failed(test, doc, s)
raise "Test failed: #{s}\n*****\n#{test}\n*****\n"+
"#{doc.inspect}\n*****\n{doc.to_html}"
end
def Maruku.metaTests
ref = {:id => 'id1', :class => ['class1','class2'],
:style=> 'Style is : important = for all } things'}
tests = MetaTests.split('***')
for test in tests
#puts "Test: #{test.inspect}"
doc = Maruku.new(test)
doc.children.size == 1 ||
failed(test, doc, "children != 1")
h = doc.children[0]
h.node_type==:header ||
failed(test, doc, "child not header")
# puts doc.inspect
# puts doc.to_html
end
end
MetaTests = <<EOF
# Head # {ref1 ref2 ref3}
{ref1}: id: id1; class: class1
{ref2}: class: class2
{ref3}: style: "Style is : important = for all } things"
***
# Head # {ref1 ref3 ref2}
{ref1}: id: id1; class: class1
{ref2}: class: class2
{ref3}: style: "Style is : important = for all } things"
***
# Head # {ref1 ref2 ref3}
{ref1}: id= id1; class=class1
{ref2}: class=class2
{ref3}: style="Style is : important = for all } things"
***
# Head # {ref1 ref2 ref3}
{ref1}: id=id1 class=class1
{ref2}: class=class2
{ref3}: style="Style is : important = for all } things"
***
# Head # {ref1 ref2 ref3}
{ref1}: id:id1 class:class1
{ref2}: class : class2
{ref3}: style = "Style is : important = for all } things"
***
# Head # {ref1 ref2 ref3}
{ref1}: id:id1 class:class1
{ref2}: class : class2
{ref3}: style = "Style is : important = for all } things"
***
# Head # {#id1 .class1 ref2 ref3}
{ref2}: class : class2
{ref3}: style = "Style is : important = for all } things"
***
# Head # { #id1 .class1 ref2 ref3 }
{ref2}: class : class2
{ref3}: style = "Style is : important = for all } things"
***
# Head # { id=id1 class=class1 ref2 ref3 }
{ref2}: class : class2
{ref3}: style = "Style is : important = for all } things"
***
# Head # { id:id1 class="class1" class:"class2" style="Style is : important = for all } things"}
EOF
end
if File.basename($0) == 'tests.rb'
Maruku.metaTests
end

199
vendor/plugins/maruku/lib/maruku/toc.rb vendored Normal file
View file

@ -0,0 +1,199 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
class MDDocument
# an instance of Section (see below)
attr_accessor :toc
end
# This represents a section in the TOC.
class Section
# a Fixnum, is == header_element.level
attr_accessor :section_level
# An array of fixnum, like [1,2,5] for Section 1.2.5
attr_accessor :section_number
# reference to header (header has h.meta[:section] to self)
attr_accessor :header_element
# Array of immediate children of this element
attr_accessor :immediate_children
# Array of Section inside this section
attr_accessor :section_children
def initialize
@immediate_children = []
@section_children = []
end
end
class Section
def inspect(indent=1)
s = ""
if @header_element
s += "\_"*indent + "(#{@section_level})>\t #{@section_number.join('.')} : "
s += @header_element.children_to_s +
" (id: '#{@header_element.attributes[:id]}')\n"
else
s += "Master\n"
end
@section_children.each do |c|
s+=c.inspect(indent+1)
end
s
end
# Numerate this section and its children
def numerate(a=[])
self.section_number = a
section_children.each_with_index do |c,i|
c.numerate(a.clone.push(i+1))
end
if h = self.header_element
h.attributes[:section_number] = self.section_number
end
end
include REXML
# Creates an HTML toc.
# Call this on the root
def to_html
div = Element.new 'div'
div.attributes['class'] = 'maruku_toc'
div << create_toc
div
end
def create_toc
ul = Element.new 'ul'
# let's remove the bullets
ul.attributes['style'] = 'list-style: none;'
@section_children.each do |c|
li = Element.new 'li'
if span = c.header_element.render_section_number
li << span
end
a = c.header_element.wrap_as_element('a')
a.delete_attribute 'id'
a.attributes['href'] = "##{c.header_element.attributes[:id]}"
li << a
li << c.create_toc if c.section_children.size>0
ul << li
end
ul
end
# Creates a latex toc.
# Call this on the root
def to_latex
to_latex_rec + "\n\n"
end
def to_latex_rec
s = ""
@section_children.each do |c|
s += "\\noindent"
number = c.header_element.section_number
s += number if number
text = c.header_element.children_to_latex
id = c.header_element.attributes[:id]
s += "\\hyperlink{#{id}}{#{text}}"
s += "\\dotfill \\pageref*{#{id}} \\linebreak\n"
s += c.to_latex_rec if c.section_children.size>0
end
s
end
end
class MDDocument
def create_toc
each_element(:header) do |h|
h.attributes[:id] ||= h.generate_id
end
stack = []
# the ancestor section
s = Section.new
s.section_level = 0
stack.push s
i = 0;
while i < @children.size
while i < @children.size
if @children[i].node_type == :header
level = @children[i].level
break if level <= stack.last.section_level+1
end
stack.last.immediate_children.push @children[i]
i += 1
end
break if i>=@children.size
header = @children[i]
level = header.level
if level > stack.last.section_level
# this level is inside
s2 = Section.new
s2.section_level = level
s2.header_element = header
header.instance_variable_set :@section, s2
stack.last.section_children.push s2
stack.push s2
i+=1
elsif level == stack.last.section_level
# this level is a sibling
stack.pop
else
# this level is a parent
stack.pop
end
end
# If there is only one big header, then assume
# it is the master
if s.section_children.size == 1
s = s.section_children.first
end
# Assign section numbers
s.numerate
s
end
end
end

View file

@ -0,0 +1,33 @@
require 'maruku'
text = <<EOF
Chapter 1
=========
It was a stormy and rainy night.
EOF
invalid = <<EOF
This is a [bad link.
EOF
Maruku.new(text).to_html
s = ""
begin
Maruku.new(invalid, {:on_error => :raise, :error_stream => s})
puts "Error! It should have thrown an exception."
rescue
# puts "ok, got error"
end
begin
Maruku.new(invalid, {:on_error => :warning, :error_stream => s})
rescue
puts "Error! It should not have thrown an exception."
end

View file

@ -0,0 +1,38 @@
#--
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
#
# This file is part of Maruku.
#
# Maruku is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Maruku is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Maruku; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#++
module MaRuKu
Version = '0.5.2'
MarukuURL = 'http://maruku.rubyforge.org/'
# If true, use also PHP Markdown extra syntax
#
# Note: it is not guaranteed that if it's false
# then no special features will be used.
def markdown_extra?
true
end
def new_meta_data?
true
end
end

38
vendor/plugins/syntax/lib/syntax.rb vendored Normal file
View file

@ -0,0 +1,38 @@
require 'syntax/common'
module Syntax
# A default tokenizer for handling syntaxes that are not explicitly handled
# elsewhere. It simply yields the given text as a single token.
class Default
# Yield the given text as a single token.
def tokenize( text )
yield Token.new( text, :normal )
end
end
# A hash for registering syntax implementations.
SYNTAX = Hash.new( Default )
# Load the implementation of the requested syntax. If the syntax cannot be
# found, or if it cannot be loaded for whatever reason, the Default syntax
# handler will be returned.
def load( syntax )
begin
require "syntax/lang/#{syntax}"
rescue LoadError
end
SYNTAX[ syntax ].new
end
module_function :load
# Return an array of the names of supported syntaxes.
def all
lang_dir = File.join(File.dirname(__FILE__), "syntax", "lang")
Dir["#{lang_dir}/*.rb"].map { |path| File.basename(path, ".rb") }
end
module_function :all
end

View file

@ -0,0 +1,163 @@
require 'strscan'
module Syntax
# A single token extracted by a tokenizer. It is simply the lexeme
# itself, decorated with a 'group' attribute to identify the type of the
# lexeme.
class Token < String
# the type of the lexeme that was extracted.
attr_reader :group
# the instruction associated with this token (:none, :region_open, or
# :region_close)
attr_reader :instruction
# Create a new Token representing the given text, and belonging to the
# given group.
def initialize( text, group, instruction = :none )
super text
@group = group
@instruction = instruction
end
end
# The base class of all tokenizers. It sets up the scanner and manages the
# looping until all tokens have been extracted. It also provides convenience
# methods to make sure adjacent tokens of identical groups are returned as
# a single token.
class Tokenizer
# The current group being processed by the tokenizer
attr_reader :group
# The current chunk of text being accumulated
attr_reader :chunk
# Start tokenizing. This sets up the state in preparation for tokenization,
# such as creating a new scanner for the text and saving the callback block.
# The block will be invoked for each token extracted.
def start( text, &block )
@chunk = ""
@group = :normal
@callback = block
@text = StringScanner.new( text )
setup
end
# Subclasses may override this method to provide implementation-specific
# setup logic.
def setup
end
# Finish tokenizing. This flushes the buffer, yielding any remaining text
# to the client.
def finish
start_group nil
teardown
end
# Subclasses may override this method to provide implementation-specific
# teardown logic.
def teardown
end
# Subclasses must implement this method, which is called for each iteration
# of the tokenization process. This method may extract multiple tokens.
def step
raise NotImplementedError, "subclasses must implement #step"
end
# Begins tokenizing the given text, calling #step until the text has been
# exhausted.
def tokenize( text, &block )
start text, &block
step until @text.eos?
finish
end
# Specify a set of tokenizer-specific options. Each tokenizer may (or may
# not) publish any options, but if a tokenizer does those options may be
# used to specify optional behavior.
def set( opts={} )
( @options ||= Hash.new ).update opts
end
# Get the value of the specified option.
def option(opt)
@options ? @options[opt] : nil
end
private
EOL = /(?=\r\n?|\n|$)/
# A convenience for delegating method calls to the scanner.
def self.delegate( sym )
define_method( sym ) { |*a| @text.__send__( sym, *a ) }
end
delegate :bol?
delegate :eos?
delegate :scan
delegate :scan_until
delegate :check
delegate :check_until
delegate :getch
delegate :matched
delegate :pre_match
delegate :peek
delegate :pos
# Access the n-th subgroup from the most recent match.
def subgroup(n)
@text[n]
end
# Append the given data to the currently active chunk.
def append( data )
@chunk << data
end
# Request that a new group be started. If the current group is the same
# as the group being requested, a new group will not be created. If a new
# group is created and the current chunk is not empty, the chunk's
# contents will be yielded to the client as a token, and then cleared.
#
# After the new group is started, if +data+ is non-nil it will be appended
# to the chunk.
def start_group( gr, data=nil )
flush_chunk if gr != @group
@group = gr
@chunk << data if data
end
def start_region( gr, data=nil )
flush_chunk
@group = gr
@callback.call( Token.new( data||"", @group, :region_open ) )
end
def end_region( gr, data=nil )
flush_chunk
@group = gr
@callback.call( Token.new( data||"", @group, :region_close ) )
end
def flush_chunk
@callback.call( Token.new( @chunk, @group ) ) unless @chunk.empty?
@chunk = ""
end
def subtokenize( syntax, text )
tokenizer = Syntax.load( syntax )
tokenizer.set @options if @options
flush_chunk
tokenizer.tokenize( text, &@callback )
end
end
end

View file

@ -0,0 +1,27 @@
require 'syntax'
module Syntax
module Convertors
# The abstract ancestor class for all convertors. It implements a few
# convenience methods to provide a common interface for all convertors.
class Abstract
# A reference to the tokenizer used by this convertor.
attr_reader :tokenizer
# A convenience method for instantiating a new convertor for a
# specific syntax.
def self.for_syntax( syntax )
new( Syntax.load( syntax ) )
end
# Creates a new convertor that uses the given tokenizer.
def initialize( tokenizer )
@tokenizer = tokenizer
end
end
end
end

View file

@ -0,0 +1,51 @@
require 'syntax/convertors/abstract'
module Syntax
module Convertors
# A simple class for converting a text into HTML.
class HTML < Abstract
# Converts the given text to HTML, using spans to represent token groups
# of any type but <tt>:normal</tt> (which is always unhighlighted). If
# +pre+ is +true+, the html is automatically wrapped in pre tags.
def convert( text, pre=true )
html = ""
html << "<pre>" if pre
regions = []
@tokenizer.tokenize( text ) do |tok|
value = html_escape(tok)
case tok.instruction
when :region_close then
regions.pop
html << "</span>"
when :region_open then
regions.push tok.group
html << "<span class=\"#{tok.group}\">#{value}"
else
if tok.group == ( regions.last || :normal )
html << value
else
html << "<span class=\"#{tok.group}\">#{value}</span>"
end
end
end
html << "</span>" while regions.pop
html << "</pre>" if pre
html
end
private
# Replaces some characters with their corresponding HTML entities.
def html_escape( string )
string.gsub( /&/, "&amp;" ).
gsub( /</, "&lt;" ).
gsub( />/, "&gt;" ).
gsub( /"/, "&quot;" )
end
end
end
end

View file

@ -0,0 +1,317 @@
require 'syntax'
module Syntax
# A tokenizer for the Ruby language. It recognizes all common syntax
# (and some less common syntax) but because it is not a true lexer, it
# will make mistakes on some ambiguous cases.
class Ruby < Tokenizer
# The list of all identifiers recognized as keywords.
KEYWORDS =
%w{if then elsif else end begin do rescue ensure while for
class module def yield raise until unless and or not when
case super undef break next redo retry in return alias
defined?}
# Perform ruby-specific setup
def setup
@selector = false
@allow_operator = false
@heredocs = []
end
# Step through a single iteration of the tokenization process.
def step
case
when bol? && check( /=begin/ )
start_group( :comment, scan_until( /^=end#{EOL}/ ) )
when bol? && check( /__END__#{EOL}/ )
start_group( :comment, scan_until( /\Z/ ) )
else
case
when check( /def\s+/ )
start_group :keyword, scan( /def\s+/ )
start_group :method, scan_until( /(?=[;(\s]|#{EOL})/ )
when check( /class\s+/ )
start_group :keyword, scan( /class\s+/ )
start_group :class, scan_until( /(?=[;\s<]|#{EOL})/ )
when check( /module\s+/ )
start_group :keyword, scan( /module\s+/ )
start_group :module, scan_until( /(?=[;\s]|#{EOL})/ )
when check( /::/ )
start_group :punct, scan(/::/)
when check( /:"/ )
start_group :symbol, scan(/:/)
scan_delimited_region :symbol, :symbol, "", true
@allow_operator = true
when check( /:'/ )
start_group :symbol, scan(/:/)
scan_delimited_region :symbol, :symbol, "", false
@allow_operator = true
when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ )
start_group :symbol, matched
@allow_operator = true
when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ )
start_group :char, matched
@allow_operator = true
when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
if @selector || matched[-1] == ?? || matched[-1] == ?!
start_group :ident,
scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/)
else
start_group :constant,
scan(/(__FILE__|__LINE__|true|false|nil|self)/)
end
@selector = false
@allow_operator = true
when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
start_group :number, matched
@allow_operator = true
else
case peek(2)
when "%r"
scan_delimited_region :punct, :regex, scan( /../ ), true
@allow_operator = true
when "%w", "%q"
scan_delimited_region :punct, :string, scan( /../ ), false
@allow_operator = true
when "%s"
scan_delimited_region :punct, :symbol, scan( /../ ), false
@allow_operator = true
when "%W", "%Q", "%x"
scan_delimited_region :punct, :string, scan( /../ ), true
@allow_operator = true
when /%[^\sa-zA-Z0-9]/
scan_delimited_region :punct, :string, scan( /./ ), true
@allow_operator = true
when "<<"
saw_word = ( chunk[-1,1] =~ /[\w!?]/ )
start_group :punct, scan( /<</ )
if saw_word
@allow_operator = false
return
end
float_right = scan( /-/ )
append "-" if float_right
if ( type = scan( /['"]/ ) )
append type
delim = scan_until( /(?=#{type})/ )
if delim.nil?
append scan_until( /\Z/ )
return
end
else
delim = scan( /\w+/ ) or return
end
start_group :constant, delim
start_group :punct, scan( /#{type}/ ) if type
@heredocs << [ float_right, type, delim ]
@allow_operator = true
else
case peek(1)
when /[\n\r]/
unless @heredocs.empty?
scan_heredoc(*@heredocs.shift)
else
start_group :normal, scan( /\s+/ )
end
@allow_operator = false
when /\s/
start_group :normal, scan( /\s+/ )
when "#"
start_group :comment, scan( /#[^\n\r]*/ )
when /[A-Z]/
start_group @selector ? :ident : :constant, scan( /\w+/ )
@allow_operator = true
when /[a-z_]/
word = scan( /\w+[?!]?/ )
if !@selector && KEYWORDS.include?( word )
start_group :keyword, word
@allow_operator = false
elsif
start_group :ident, word
@allow_operator = true
end
@selector = false
when /\d/
start_group :number,
scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ )
@allow_operator = true
when '"'
scan_delimited_region :punct, :string, "", true
@allow_operator = true
when '/'
if @allow_operator
start_group :punct, scan(%r{/})
@allow_operator = false
else
scan_delimited_region :punct, :regex, "", true
@allow_operator = true
end
when "'"
scan_delimited_region :punct, :string, "", false
@allow_operator = true
when "."
dots = scan( /\.{1,3}/ )
start_group :punct, dots
@selector = ( dots.length == 1 )
when /[@]/
start_group :attribute, scan( /@{1,2}\w*/ )
@allow_operator = true
when /[$]/
start_group :global, scan(/\$/)
start_group :global, scan( /\w+|./ ) if check(/./)
@allow_operator = true
when /[-!?*\/+=<>(\[\{}:;,&|%]/
start_group :punct, scan(/./)
@allow_operator = false
when /[)\]]/
start_group :punct, scan(/./)
@allow_operator = true
else
# all else just falls through this, to prevent
# infinite loops...
append getch
end
end
end
end
end
private
# Scan a delimited region of text. This handles the simple cases (strings
# delimited with quotes) as well as the more complex cases of %-strings
# and here-documents.
#
# * +delim_group+ is the group to use to classify the delimiters of the
# region
# * +inner_group+ is the group to use to classify the contents of the
# region
# * +starter+ is the text to use as the starting delimiter
# * +exprs+ is a boolean flag indicating whether the region is an
# interpolated string or not
# * +delim+ is the text to use as the delimiter of the region. If +nil+,
# the next character will be treated as the delimiter.
# * +heredoc+ is either +false+, meaning the region is not a heredoc, or
# <tt>:flush</tt> (meaning the delimiter must be flushed left), or
# <tt>:float</tt> (meaning the delimiter doens't have to be flush left).
def scan_delimited_region( delim_group, inner_group, starter, exprs,
delim=nil, heredoc=false )
# begin
if !delim
start_group delim_group, starter
delim = scan( /./ )
append delim
delim = case delim
when '{' then '}'
when '(' then ')'
when '[' then ']'
when '<' then '>'
else delim
end
end
start_region inner_group
items = "\\\\|"
if heredoc
items << "(^"
items << '\s*' if heredoc == :float
items << "#{Regexp.escape(delim)}\s*?)#{EOL}"
else
items << "#{Regexp.escape(delim)}"
end
items << "|#(\\$|@@?|\\{)" if exprs
items = Regexp.new( items )
loop do
p = pos
match = scan_until( items )
if match.nil?
start_group inner_group, scan_until( /\Z/ )
break
else
text = pre_match[p..-1]
start_group inner_group, text if text.length > 0
case matched.strip
when "\\"
unless exprs
case peek(1)
when "'"
scan(/./)
start_group :escape, "\\'"
when "\\"
scan(/./)
start_group :escape, "\\\\"
else
start_group inner_group, "\\"
end
else
start_group :escape, "\\"
c = getch
append c
case c
when 'x'
append scan( /[a-fA-F0-9]{1,2}/ )
when /[0-7]/
append scan( /[0-7]{0,2}/ )
end
end
when delim
end_region inner_group
start_group delim_group, matched
break
when /^#/
do_highlight = (option(:expressions) == :highlight)
start_region :expr if do_highlight
start_group :expr, matched
case matched[1]
when ?{
depth = 1
content = ""
while depth > 0
p = pos
c = scan_until( /[\{}]/ )
if c.nil?
content << scan_until( /\Z/ )
break
else
depth += ( matched == "{" ? 1 : -1 )
content << pre_match[p..-1]
content << matched if depth > 0
end
end
if do_highlight
subtokenize "ruby", content
start_group :expr, "}"
else
append content + "}"
end
when ?$, ?@
append scan( /\w+/ )
end
end_region :expr if do_highlight
else raise "unexpected match on #{matched}"
end
end
end
end
# Scan a heredoc beginning at the current position.
#
# * +float+ indicates whether the delimiter may be floated to the right
# * +type+ is +nil+, a single quote, or a double quote
# * +delim+ is the delimiter to look for
def scan_heredoc(float, type, delim)
scan_delimited_region( :constant, :string, "", type != "'",
delim, float ? :float : :flush )
end
end
SYNTAX["ruby"] = Ruby
end

View file

@ -0,0 +1,108 @@
require 'syntax'
module Syntax
# A simple implementation of an XML lexer. It handles most cases. It is
# not a validating lexer, meaning it will happily process invalid XML without
# complaining.
class XML < Tokenizer
# Initialize the lexer.
def setup
@in_tag = false
end
# Step through a single iteration of the tokenization process. This will
# yield (potentially) many tokens, and possibly zero tokens.
def step
start_group :normal, matched if scan( /\s+/ )
if @in_tag
case
when scan( /([-\w]+):([-\w]+)/ )
start_group :namespace, subgroup(1)
start_group :punct, ":"
start_group :attribute, subgroup(2)
when scan( /\d+/ )
start_group :number, matched
when scan( /[-\w]+/ )
start_group :attribute, matched
when scan( %r{[/?]?>} )
@in_tag = false
start_group :punct, matched
when scan( /=/ )
start_group :punct, matched
when scan( /["']/ )
scan_string matched
else
append getch
end
elsif ( text = scan_until( /(?=[<&])/ ) )
start_group :normal, text unless text.empty?
if scan(/<!--.*?(-->|\Z)/m)
start_group :comment, matched
else
case peek(1)
when "<"
start_group :punct, getch
case peek(1)
when "?"
append getch
when "/"
append getch
when "!"
append getch
end
start_group :normal, matched if scan( /\s+/ )
if scan( /([-\w]+):([-\w]+)/ )
start_group :namespace, subgroup(1)
start_group :punct, ":"
start_group :tag, subgroup(2)
elsif scan( /[-\w]+/ )
start_group :tag, matched
end
@in_tag = true
when "&"
if scan( /&\S{1,10};/ )
start_group :entity, matched
else
start_group :normal, scan( /&/ )
end
end
end
else
append scan_until( /\Z/ )
end
end
private
# Scan the string starting at the current position, with the given
# delimiter character.
def scan_string( delim )
start_group :punct, delim
match = /(?=[&\\]|#{delim})/
loop do
break unless ( text = scan_until( match ) )
start_group :string, text unless text.empty?
case peek(1)
when "&"
if scan( /&\S{1,10};/ )
start_group :entity, matched
else
start_group :string, getch
end
when "\\"
start_group :string, getch
append getch || ""
when delim
start_group :punct, getch
break
end
end
end
end
SYNTAX["xml"] = XML
end

View file

@ -0,0 +1,105 @@
require 'syntax'
module Syntax
# A simple implementation of an YAML lexer. It handles most cases. It is
# not a validating lexer.
class YAML < Tokenizer
# Step through a single iteration of the tokenization process. This will
# yield (potentially) many tokens, and possibly zero tokens.
def step
if bol?
case
when scan(/---(\s*.+)?$/)
start_group :document, matched
when scan(/(\s*)([a-zA-Z][-\w]*)(\s*):/)
start_group :normal, subgroup(1)
start_group :key, subgroup(2)
start_group :normal, subgroup(3)
start_group :punct, ":"
when scan(/(\s*)-/)
start_group :normal, subgroup(1)
start_group :punct, "-"
when scan(/\s*$/)
start_group :normal, matched
when scan(/#.*$/)
start_group :comment, matched
else
append getch
end
else
case
when scan(/[\n\r]+/)
start_group :normal, matched
when scan(/[ \t]+/)
start_group :normal, matched
when scan(/!+(.*?^)?\S+/)
start_group :type, matched
when scan(/&\S+/)
start_group :anchor, matched
when scan(/\*\S+/)
start_group :ref, matched
when scan(/\d\d:\d\d:\d\d/)
start_group :time, matched
when scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/)
start_group :date, matched
when scan(/['"]/)
start_group :punct, matched
scan_string matched
when scan(/:\w+/)
start_group :symbol, matched
when scan(/[:]/)
start_group :punct, matched
when scan(/#.*$/)
start_group :comment, matched
when scan(/>-?/)
start_group :punct, matched
start_group :normal, scan(/.*$/)
append getch until eos? || bol?
return if eos?
indent = check(/ */)
start_group :string
loop do
line = check_until(/[\n\r]|\Z/)
break if line.nil?
if line.chomp.length > 0
this_indent = line.chomp.match( /^\s*/ )[0]
break if this_indent.length < indent.length
end
append scan_until(/[\n\r]|\Z/)
end
else
start_group :normal, scan_until(/(?=$|#)/)
end
end
end
private
def scan_string( delim )
regex = /(?=[#{delim=="'" ? "" : "\\\\"}#{delim}])/
loop do
text = scan_until( regex )
if text.nil?
start_group :string, scan_until( /\Z/ )
break
else
start_group :string, text unless text.empty?
end
case peek(1)
when "\\"
start_group :expr, scan(/../)
else
start_group :punct, getch
break
end
end
end
end
SYNTAX["yaml"] = YAML
end

View file

@ -0,0 +1,9 @@
module Syntax
module Version
MAJOR=1
MINOR=0
TINY=0
STRING=[MAJOR,MINOR,TINY].join('.')
end
end

View file

@ -0,0 +1,179 @@
#!/usr/bin/env ruby
# Author: Aredridel <aredridel@nbtsc.org>
# Website: http://theinternetco.net/projects/ruby/xhtmldiff.html
# Licence: same as Ruby
# Version: 1.2.2
#
# Tweaks by Jacques Distler <distler@golem.ph.utexas.edu>
# -- add classnames to <del> and <ins> elements added by XHTMLDiff,
# for better CSS styling
require 'diff/lcs'
require 'rexml/document'
require 'delegate'
def Math.max(a, b)
a > b ? a : b
end
module REXML
class Text
def deep_clone
clone
end
end
class HashableElementDelegator < DelegateClass(Element)
def initialize(sub)
super sub
end
def == other
res = other.to_s.strip == self.to_s.strip
res
end
def eql? other
self == other
end
def[](k)
r = super
if r.kind_of? __getobj__.class
self.class.new(r)
else
r
end
end
def hash
r = __getobj__.to_s.hash
r
end
end
end
class XHTMLDiff
include REXML
attr_accessor :output
class << self
BLOCK_CONTAINERS = ['div', 'ul', 'li']
def diff(a, b)
if a == b
return a.deep_clone
end
if REXML::HashableElementDelegator === a and REXML::HashableElementDelegator === b
o = REXML::Element.new(a.name)
o.add_attributes a.attributes
hd = self.new(o)
Diff::LCS.traverse_balanced(a, b, hd)
o
elsif REXML::Text === a and REXML::Text === b
o = REXML::Element.new('span')
aa = a.value.split(/\s/)
ba = b.value.split(/\s/)
hd = XHTMLTextDiff.new(o)
Diff::LCS.traverse_balanced(aa, ba, hd)
o
else
raise ArgumentError.new("both arguments must be equal or both be elements. a is #{a.class.name} and b is #{b.class.name}")
end
end
end
def diff(a, b)
self.class.diff(a,b)
end
def initialize(output)
@output = output
end
# This will be called with both elements are the same
def match(event)
@output << event.old_element.deep_clone if event.old_element
end
# This will be called when there is an element in A that isn't in B
def discard_a(event)
@output << wrap(event.old_element, 'del', 'diffdel')
end
def change(event)
begin
sd = diff(event.old_element, event.new_element)
rescue ArgumentError
sd = nil
end
if sd and (ratio = (Float(rs = sd.to_s.gsub(%r{<(ins|del)>.*</\1>}, '').size) / bs = Math.max(event.old_element.to_s.size, event.new_element.to_s.size))) > 0.5
@output << sd
else
@output << wrap(event.old_element, 'del', 'diffmod')
@output << wrap(event.new_element, 'ins', 'diffmod')
end
end
# This will be called when there is an element in B that isn't in A
def discard_b(event)
@output << wrap(event.new_element, 'ins', 'diffins')
end
def choose_event(event, element, tag)
end
def wrap(element, tag = nil, class_name = nil)
if tag
el = Element.new tag
el << element.deep_clone
else
el = element.deep_clone
end
if class_name
el.add_attribute('class', class_name)
end
el
end
class XHTMLTextDiff < XHTMLDiff
def change(event)
@output << wrap(event.old_element, 'del', 'diffmod')
@output << wrap(event.new_element, 'ins', 'diffmod')
end
# This will be called with both elements are the same
def match(event)
@output << wrap(event.old_element, nil, nil) if event.old_element
end
# This will be called when there is an element in A that isn't in B
def discard_a(event)
@output << wrap(event.old_element, 'del', 'diffdel')
end
# This will be called when there is an element in B that isn't in A
def discard_b(event)
@output << wrap(event.new_element, 'ins', 'diffins')
end
def wrap(element, tag = nil, class_name = nil)
element = REXML::Text.new(" " << element) if String === element
return element unless tag
wrapper_element = REXML::Element.new(tag)
wrapper_element.add_text element
if class_name
wrapper_element.add_attribute('class', class_name)
end
wrapper_element
end
end
end
if $0 == __FILE__
$stderr.puts "No tests available yet"
exit(1)
end