instiki/lib/redcloth_for_tex.rb

737 lines
21 KiB
Ruby
Raw Normal View History

2007-01-22 14:43:50 +01:00
# This is RedCloth (http://www.whytheluckystiff.net/ruby/redcloth/)
# converted by David Heinemeier Hansson to emit Tex
class String
# Flexible HTML escaping
def texesc!( mode )
gsub!( '&', '\\\\&' )
gsub!( '%', '\%' )
gsub!( '$', '\$' )
gsub!( '~', '$\sim$' )
end
end
def table_of_contents(text, pages)
text.gsub( /^([#*]+? .*?)$(?![^#*])/m ) do |match|
lines = match.split( /\n/ )
last_line = -1
depth = []
lines.each_with_index do |line, line_id|
if line =~ /^([#*]+) (.*)$/m
tl,content = $~[1..2]
content.gsub! /[\[\]]/, ""
content.strip!
if depth.last
if depth.last.length > tl.length
(depth.length - 1).downto(0) do |i|
break if depth[i].length == tl.length
lines[line_id - 1] << "" # "\n\t\\end{#{ lT( depth[i] ) }}\n\t"
depth.pop
end
end
if !depth.last.nil? && !tl.length.nil? && depth.last.length == tl.length
lines[line_id - 1] << ''
end
end
depth << tl unless depth.last == tl
subsection_depth = [depth.length - 1, 2].min
lines[line_id] = "\n\\#{ "sub" * subsection_depth }section{#{ content }}"
lines[line_id] += "\n#{pages[content]}" if pages.keys.include?(content)
lines[line_id] = "\\pagebreak\n#{lines[line_id]}" if subsection_depth == 0
last_line = line_id
elsif line =~ /^\s+\S/
last_line = line_id
elsif line_id - last_line < 2 and line =~ /^\S/
last_line = line_id
end
if line_id - last_line > 1 or line_id == lines.length - 1
depth.delete_if do |v|
lines[last_line] << "" # "\n\t\\end{#{ lT( v ) }}"
end
end
end
lines.join( "\n" )
end
end
class RedClothForTex < String
VERSION = '2.0.7'
#
# Mapping of 8-bit ASCII codes to HTML numerical entity equivalents.
# (from PyTextile)
#
TEXTILE_TAGS =
[[128, 8364], [129, 0], [130, 8218], [131, 402], [132, 8222], [133, 8230],
[134, 8224], [135, 8225], [136, 710], [137, 8240], [138, 352], [139, 8249],
[140, 338], [141, 0], [142, 0], [143, 0], [144, 0], [145, 8216], [146, 8217],
[147, 8220], [148, 8221], [149, 8226], [150, 8211], [151, 8212], [152, 732],
[153, 8482], [154, 353], [155, 8250], [156, 339], [157, 0], [158, 0], [159, 376]].
collect! do |a, b|
[a.chr, ( b.zero? and "" or "&#{ b };" )]
end
#
# Regular expressions to convert to HTML.
#
A_HLGN = /(?:(?:<>|<|>|\=|[()]+)+)/
A_VLGN = /[\-^~]/
C_CLAS = '(?:\([^)]+\))'
C_LNGE = '(?:\[[^\]]+\])'
C_STYL = '(?:\{[^}]+\})'
S_CSPN = '(?:\\\\\d+)'
S_RSPN = '(?:/\d+)'
A = "(?:#{A_HLGN}?#{A_VLGN}?|#{A_VLGN}?#{A_HLGN}?)"
S = "(?:#{S_CSPN}?#{S_RSPN}|#{S_RSPN}?#{S_CSPN}?)"
C = "(?:#{C_CLAS}?#{C_STYL}?#{C_LNGE}?|#{C_STYL}?#{C_LNGE}?#{C_CLAS}?|#{C_LNGE}?#{C_STYL}?#{C_CLAS}?)"
# PUNCT = Regexp::quote( '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~' )
PUNCT = Regexp::quote( '!"#$%&\'*+,-./:;=?@\\^_`|~' )
HYPERLINK = '(\S+?)([^\w\s/;=\?]*?)(\s|$)'
GLYPHS = [
# [ /([^\s\[{(>])?\'([dmst]\b|ll\b|ve\b|\s|:|$)/, '\1&#8217;\2' ], # single closing
[ /([^\s\[{(>])\'/, '\1&#8217;' ], # single closing
[ /\'(?=\s|s\b|[#{PUNCT}])/, '&#8217;' ], # single closing
[ /\'/, '&#8216;' ], # single opening
# [ /([^\s\[{(])?"(\s|:|$)/, '\1&#8221;\2' ], # double closing
[ /([^\s\[{(>])"/, '\1&#8221;' ], # double closing
[ /"(?=\s|[#{PUNCT}])/, '&#8221;' ], # double closing
[ /"/, '&#8220;' ], # double opening
[ /\b( )?\.{3}/, '\1&#8230;' ], # ellipsis
[ /\b([A-Z][A-Z0-9]{2,})\b(?:[(]([^)]*)[)])/, '<acronym title="\2">\1</acronym>' ], # 3+ uppercase acronym
[ /(^|[^"][>\s])([A-Z][A-Z0-9 ]{2,})([^<a-z0-9]|$)/, '\1<span class="caps">\2</span>\3' ], # 3+ uppercase caps
[ /(\.\s)?\s?--\s?/, '\1&#8212;' ], # em dash
[ /\s->\s/, ' &rarr; ' ], # en dash
[ /\s-\s/, ' &#8211; ' ], # en dash
[ /(\d+) ?x ?(\d+)/, '\1&#215;\2' ], # dimension sign
[ /\b ?[(\[]TM[\])]/i, '&#8482;' ], # trademark
[ /\b ?[(\[]R[\])]/i, '&#174;' ], # registered
[ /\b ?[(\[]C[\])]/i, '&#169;' ] # copyright
]
I_ALGN_VALS = {
'<' => 'left',
'=' => 'center',
'>' => 'right'
}
H_ALGN_VALS = {
'<' => 'left',
'=' => 'center',
'>' => 'right',
'<>' => 'justify'
}
V_ALGN_VALS = {
'^' => 'top',
'-' => 'middle',
'~' => 'bottom'
}
QTAGS = [
['**', 'bf'],
['*', 'bf'],
['??', 'cite'],
['-', 'del'],
['__', 'underline'],
['_', 'em'],
['%', 'span'],
['+', 'ins'],
['^', 'sup'],
['~', 'sub']
]
def self.available?
if not defined? @@available
begin
@@available = system "pdflatex -version"
rescue Errno::ENOENT
@@available = false
end
end
@@available
end
#
# Two accessor for setting security restrictions.
#
# This is a nice thing if you're using RedCloth for
# formatting in public places (e.g. Wikis) where you
# don't want users to abuse HTML for bad things.
#
# If +:filter_html+ is set, HTML which wasn't
# created by the Textile processor will be escaped.
#
# If +:filter_styles+ is set, it will also disable
# the style markup specifier. ('{color: red}')
#
attr_accessor :filter_html, :filter_styles
#
# Accessor for toggling line folding.
#
# If +:fold_lines+ is set, single newlines will
# not be converted to break tags.
#
attr_accessor :fold_lines
def initialize( string, restrictions = [] )
restrictions.each { |r| method( "#{ r }=" ).call( true ) }
super( string )
end
#
# Generate tex.
#
def to_tex( lite = false )
# make our working copy
text = self.dup
@urlrefs = {}
@shelf = []
# incoming_entities text
fix_entities text
clean_white_space text
get_refs text
no_textile text
unless lite
lists text
table text
end
glyphs text
unless lite
fold text
block text
end
retrieve text
encode_entities text
text.gsub!(/\[\[(.*?)\]\]/, "\\1")
text.gsub!(/_/, "\\_")
text.gsub!( /<\/?notextile>/, '' )
# text.gsub!( /x%x%/, '&#38;' )
# text.gsub!( /<br \/>/, "<br />\n" )
text.strip!
text
end
def pgl( text )
GLYPHS.each do |re, resub|
text.gsub! re, resub
end
end
def pba( text_in, element = "" )
return '' unless text_in
style = []
text = text_in.dup
if element == 'td'
colspan = $1 if text =~ /\\(\d+)/
rowspan = $1 if text =~ /\/(\d+)/
style << "vertical-align:#{ v_align( $& ) };" if text =~ A_VLGN
end
style << "#{ $1 };" if not @filter_styles and
text.sub!( /\{([^}]*)\}/, '' )
lang = $1 if
text.sub!( /\[([^)]+?)\]/, '' )
cls = $1 if
text.sub!( /\(([^()]+?)\)/, '' )
style << "padding-left:#{ $1.length }em;" if
text.sub!( /([(]+)/, '' )
style << "padding-right:#{ $1.length }em;" if text.sub!( /([)]+)/, '' )
style << "text-align:#{ h_align( $& ) };" if text =~ A_HLGN
cls, id = $1, $2 if cls =~ /^(.*?)#(.*)$/
atts = ''
atts << " style=\"#{ style.join }\"" unless style.empty?
atts << " class=\"#{ cls }\"" unless cls.to_s.empty?
atts << " lang=\"#{ lang }\"" if lang
atts << " id=\"#{ id }\"" if id
atts << " colspan=\"#{ colspan }\"" if colspan
atts << " rowspan=\"#{ rowspan }\"" if rowspan
atts
end
def table( text )
text << "\n\n"
text.gsub!( /^(?:table(_?#{S}#{A}#{C})\. ?\n)?^(#{A}#{C}\.? ?\|.*?\|)\n\n/m ) do |matches|
tatts, fullrow = $~[1..2]
tatts = pba( tatts, 'table' )
rows = []
fullrow.
split( /\|$/m ).
delete_if { |x| x.empty? }.
each do |row|
ratts, row = pba( $1, 'tr' ), $2 if row =~ /^(#{A}#{C}\. )(.*)/m
cells = []
row.split( '|' ).each do |cell|
ctyp = 'd'
ctyp = 'h' if cell =~ /^_/
catts = ''
catts, cell = pba( $1, 'td' ), $2 if cell =~ /^(_?#{S}#{A}#{C}\. )(.*)/
unless cell.strip.empty?
cells << "\t\t\t<t#{ ctyp }#{ catts }>#{ cell }</t#{ ctyp }>"
end
end
rows << "\t\t<tr#{ ratts }>\n#{ cells.join( "\n" ) }\n\t\t</tr>"
end
"\t<table#{ tatts }>\n#{ rows.join( "\n" ) }\n\t</table>\n\n"
end
end
def lists( text )
text.gsub!( /^([#*]+?#{C} .*?)$(?![^#*])/m ) do |match|
lines = match.split( /\n/ )
last_line = -1
depth = []
lines.each_with_index do |line, line_id|
if line =~ /^([#*]+)(#{A}#{C}) (.*)$/m
tl,atts,content = $~[1..3]
if depth.last
if depth.last.length > tl.length
(depth.length - 1).downto(0) do |i|
break if depth[i].length == tl.length
lines[line_id - 1] << "\n\t\\end{#{ lT( depth[i] ) }}\n\t"
depth.pop
end
end
if !depth.last.nil? && !tl.length.nil? && depth.last.length == tl.length
lines[line_id - 1] << ''
end
end
unless depth.last == tl
depth << tl
atts = pba( atts )
lines[line_id] = "\t\\begin{#{ lT(tl) }}\n\t\\item #{ content }"
else
lines[line_id] = "\t\t\\item #{ content }"
end
last_line = line_id
elsif line =~ /^\s+\S/
last_line = line_id
elsif line_id - last_line < 2 and line =~ /^\S/
last_line = line_id
end
if line_id - last_line > 1 or line_id == lines.length - 1
depth.delete_if do |v|
lines[last_line] << "\n\t\\end{#{ lT( v ) }}"
end
end
end
lines.join( "\n" )
end
end
def lT( text )
text =~ /\#$/ ? 'enumerate' : 'itemize'
end
def fold( text )
text.gsub!( /(.+)\n(?![#*\s|])/, "\\1\\\\\\\\" )
# text.gsub!( /(.+)\n(?![#*\s|])/, "\\1#{ @fold_lines ? ' ' : '<br />' }" )
end
def block( text )
pre = false
find = ['bq','h[1-6]','fn\d+']
regexp_cue = []
lines = text.split( /\n/ ) + [' ']
new_text =
lines.collect do |line|
pre = true if line =~ /<(pre|notextile)>/i
find.each do |tag|
line.gsub!( /^(#{ tag })(#{A}#{C})\.(?::(\S+))? (.*)$/ ) do |m|
tag,atts,cite,content = $~[1..4]
atts = pba( atts )
if tag =~ /fn(\d+)/
# tag = 'p';
# atts << " id=\"fn#{ $1 }\""
regexp_cue << [ /footnote\{#{$1}}/, "footnote{#{content}}" ]
content = ""
end
if tag =~ /h([1-6])/
section_type = "sub" * [$1.to_i - 1, 2].min
start = "\t\\#{section_type}section*{"
tend = "}"
end
if tag == "bq"
cite = check_refs( cite )
cite = " cite=\"#{ cite }\"" if cite
start = "\t\\begin{quotation}\n\\noindent {\\em ";
tend = "}\n\t\\end{quotation}";
end
"#{ start }#{ content }#{ tend }"
end unless pre
end
#line.gsub!( /^(?!\t|<\/?pre|<\/?notextile|<\/?code|$| )(.*)/, "\t<p>\\1</p>" )
#line.gsub!( "<br />", "\n" ) if pre
# pre = false if line =~ /<\/(pre|notextile)>/i
line
end.join( "\n" )
text.replace( new_text )
regexp_cue.each { |pair| text.gsub!(pair.first, pair.last) }
end
def span( text )
QTAGS.each do |tt, ht|
ttr = Regexp::quote( tt )
text.gsub!(
/(^|\s|\>|[#{PUNCT}{(\[])
#{ttr}
(#{C})
(?::(\S+?))?
([^\s#{ttr}]+?(?:[^\n]|\n(?!\n))*?)
([#{PUNCT}]*?)
#{ttr}
(?=[\])}]|[#{PUNCT}]+?|<|\s|$)/xm
) do |m|
start,atts,cite,content,tend = $~[1..5]
atts = pba( atts )
atts << " cite=\"#{ cite }\"" if cite
"#{ start }{\\#{ ht } #{ content }#{ tend }}"
end
end
end
def links( text )
text.gsub!( /
([\s\[{(]|[#{PUNCT}])? # $pre
" # start
(#{C}) # $atts
([^"]+?) # $text
\s?
(?:\(([^)]+?)\)(?="))? # $title
":
(\S+?) # $url
(\/)? # $slash
([^\w\/;]*?) # $post
(?=\s|$)
/x ) do |m|
pre,atts,text,title,url,slash,post = $~[1..7]
url.gsub!(/(\\)(.)/, '\2')
url = check_refs( url )
atts = pba( atts )
atts << " title=\"#{ title }\"" if title
atts = shelve( atts ) if atts
"#{ pre }\\textit{#{ text }} \\footnote{\\texttt{\\textless #{ url }#{ slash }" +
"\\textgreater}#{ post }}"
end
end
def get_refs( text )
text.gsub!( /(^|\s)\[(.+?)\]((?:http:\/\/|javascript:|ftp:\/\/|\/)\S+?)(?=\s|$)/ ) do |m|
flag, url = $~[1..2]
@urlrefs[flag] = url
end
end
def check_refs( text )
@urlrefs[text] || text
end
def image( text )
text.gsub!( /
\! # opening
(\<|\=|\>)? # optional alignment atts
(#{C}) # optional style,class atts
(?:\. )? # optional dot-space
([^\s(!]+?) # presume this is the src
\s? # optional space
(?:\(((?:[^\(\)]|\([^\)]+\))+?)\))? # optional title
\! # closing
(?::#{ HYPERLINK })? # optional href
/x ) do |m|
algn,atts,url,title,href,href_a1,href_a2 = $~[1..7]
atts = pba( atts )
atts << " align=\"#{ i_align( algn ) }\"" if algn
atts << " title=\"#{ title }\"" if title
atts << " alt=\"#{ title }\""
# size = @getimagesize($url);
# if($size) $atts.= " $size[3]";
href = check_refs( href ) if href
url = check_refs( url )
out = ''
out << "<a href=\"#{ href }\">" if href
out << "<img src=\"#{ url }\"#{ atts } />"
out << "</a>#{ href_a1 }#{ href_a2 }" if href
out
end
end
def code( text )
text.gsub!( /
(?:^|([\s\(\[{])) # 1 open bracket?
@ # opening
(?:\|(\w+?)\|)? # 2 language
(\S(?:[^\n]|\n(?!\n))*?) # 3 code
@ # closing
(?:$|([\]})])|
(?=[#{PUNCT}]{1,2}|
\s)) # 4 closing bracket?
/x ) do |m|
before,lang,code,after = $~[1..4]
lang = " language=\"#{ lang }\"" if lang
"#{ before }<code#{ lang }>#{ code }</code>#{ after }"
end
end
def shelve( val )
@shelf << val
" <#{ @shelf.length }>"
end
def retrieve( text )
@shelf.each_with_index do |r, i|
text.gsub!( " <#{ i + 1 }>", r )
end
end
def incoming_entities( text )
## turn any incoming ampersands into a dummy character for now.
## This uses a negative lookahead for alphanumerics followed by a semicolon,
## implying an incoming html entity, to be skipped
text.gsub!( /&(?![#a-z0-9]+;)/i, "x%x%" )
end
def encode_entities( text )
## Convert high and low ascii to entities.
# if $-K == "UTF-8"
# encode_high( text )
# else
text.texesc!( :NoQuotes )
# end
end
def fix_entities( text )
## de-entify any remaining angle brackets or ampersands
text.gsub!( "\&", "&" )
text.gsub!( "\%", "%" )
end
def clean_white_space( text )
text.gsub!( /\r\n/, "\n" )
text.gsub!( /\t/, '' )
text.gsub!( /\n{3,}/, "\n\n" )
text.gsub!( /\n *\n/, "\n\n" )
text.gsub!( /"$/, "\" " )
end
def no_textile( text )
text.gsub!( /(^|\s)==(.*?)==(\s|$)?/,
'\1<notextile>\2</notextile>\3' )
end
def footnote_ref( text )
text.gsub!( /\[([0-9]+?)\](\s)?/,
'\footnote{\1}\2')
#'<sup><a href="#fn\1">\1</a></sup>\2' )
end
def inline( text )
image text
links text
code text
span text
end
def glyphs_deep( text )
codepre = 0
offtags = /(?:code|pre|kbd|notextile)/
if text !~ /<.*>/
# pgl text
footnote_ref text
else
used_offtags = {}
text.gsub!( /(?:[^<].*?(?=<[^\n]*?>|$)|<[^\n]*?>+)/m ) do |line|
tagline = ( line =~ /^<.*>/ )
## matches are off if we're between <code>, <pre> etc.
if tagline
if line =~ /<(#{ offtags })>/i
codepre += 1
used_offtags[$1] = true
line.texesc!( :NoQuotes ) if codepre - used_offtags.length > 0
elsif line =~ /<\/(#{ offtags })>/i
line.texesc!( :NoQuotes ) if codepre - used_offtags.length > 0
codepre -= 1 unless codepre.zero?
used_offtags = {} if codepre.zero?
elsif @filter_html or codepre > 0
line.texesc!( :NoQuotes )
## line.gsub!( /&lt;(\/?#{ offtags })&gt;/, '<\1>' )
end
## do htmlspecial if between <code>
elsif codepre > 0
line.texesc!( :NoQuotes )
## line.gsub!( /&lt;(\/?#{ offtags })&gt;/, '<\1>' )
elsif not tagline
inline line
glyphs_deep line
end
line
end
end
end
def glyphs( text )
text.gsub!( /"\z/, "\" " )
## if no html, do a simple search and replace...
if text !~ /<.*>/
inline text
end
glyphs_deep text
end
def i_align( text )
I_ALGN_VALS[text]
end
def h_align( text )
H_ALGN_VALS[text]
end
def v_align( text )
V_ALGN_VALS[text]
end
def encode_high( text )
## mb_encode_numericentity($text, $cmap, $charset);
end
def decode_high( text )
## mb_decode_numericentity($text, $cmap, $charset);
end
def textile_popup_help( name, helpvar, windowW, windowH )
' <a target="_blank" href="http://www.textpattern.com/help/?item=' + helpvar + '" onclick="window.open(this.href, \'popupwindow\', \'width=' + windowW + ',height=' + windowH + ',scrollbars,resizable\'); return false;">' + name + '</a><br />'
end
CMAP = [
160, 255, 0, 0xffff,
402, 402, 0, 0xffff,
913, 929, 0, 0xffff,
931, 937, 0, 0xffff,
945, 969, 0, 0xffff,
977, 978, 0, 0xffff,
982, 982, 0, 0xffff,
8226, 8226, 0, 0xffff,
8230, 8230, 0, 0xffff,
8242, 8243, 0, 0xffff,
8254, 8254, 0, 0xffff,
8260, 8260, 0, 0xffff,
8465, 8465, 0, 0xffff,
8472, 8472, 0, 0xffff,
8476, 8476, 0, 0xffff,
8482, 8482, 0, 0xffff,
8501, 8501, 0, 0xffff,
8592, 8596, 0, 0xffff,
8629, 8629, 0, 0xffff,
8656, 8660, 0, 0xffff,
8704, 8704, 0, 0xffff,
8706, 8707, 0, 0xffff,
8709, 8709, 0, 0xffff,
8711, 8713, 0, 0xffff,
8715, 8715, 0, 0xffff,
8719, 8719, 0, 0xffff,
8721, 8722, 0, 0xffff,
8727, 8727, 0, 0xffff,
8730, 8730, 0, 0xffff,
8733, 8734, 0, 0xffff,
8736, 8736, 0, 0xffff,
8743, 8747, 0, 0xffff,
8756, 8756, 0, 0xffff,
8764, 8764, 0, 0xffff,
8773, 8773, 0, 0xffff,
8776, 8776, 0, 0xffff,
8800, 8801, 0, 0xffff,
8804, 8805, 0, 0xffff,
8834, 8836, 0, 0xffff,
8838, 8839, 0, 0xffff,
8853, 8853, 0, 0xffff,
8855, 8855, 0, 0xffff,
8869, 8869, 0, 0xffff,
8901, 8901, 0, 0xffff,
8968, 8971, 0, 0xffff,
9001, 9002, 0, 0xffff,
9674, 9674, 0, 0xffff,
9824, 9824, 0, 0xffff,
9827, 9827, 0, 0xffff,
9829, 9830, 0, 0xffff,
338, 339, 0, 0xffff,
352, 353, 0, 0xffff,
376, 376, 0, 0xffff,
710, 710, 0, 0xffff,
732, 732, 0, 0xffff,
8194, 8195, 0, 0xffff,
8201, 8201, 0, 0xffff,
8204, 8207, 0, 0xffff,
8211, 8212, 0, 0xffff,
8216, 8218, 0, 0xffff,
8218, 8218, 0, 0xffff,
8220, 8222, 0, 0xffff,
8224, 8225, 0, 0xffff,
8240, 8240, 0, 0xffff,
8249, 8250, 0, 0xffff,
8364, 8364, 0, 0xffff
]
end