From 1c05a94d1b0e9006b6dfc723e7c8adef1b5e9d9f Mon Sep 17 00:00:00 2001 From: Jacques Distler Date: Tue, 23 Jan 2007 09:26:45 -0600 Subject: [PATCH] Updated to latest Maruku. --- config/spam_patterns.txt | 1 - lib/maruku/defaults.rb | 11 +- lib/maruku/input/html_helper.rb | 151 ++++++++++++++++---------- lib/maruku/input/parse_span_better.rb | 8 +- lib/maruku/input/type_detection.rb | 6 +- lib/maruku/output/to_html.rb | 146 ++++++++++++++++++++++--- lib/maruku/output/to_latex.rb | 47 +++++--- lib/maruku/tests/new_parser.rb | 6 +- lib/maruku/version.rb | 2 +- 9 files changed, 280 insertions(+), 98 deletions(-) diff --git a/config/spam_patterns.txt b/config/spam_patterns.txt index 1a791406..a1c0a590 100644 --- a/config/spam_patterns.txt +++ b/config/spam_patterns.txt @@ -49,7 +49,6 @@ lust cartoon mijneigenweblog Mortage myspace -naked netfirms\.com nice site overflow:\s*auto diff --git a/lib/maruku/defaults.rb b/lib/maruku/defaults.rb index 93dff0c1..e8912e89 100644 --- a/lib/maruku/defaults.rb +++ b/lib/maruku/defaults.rb @@ -23,8 +23,10 @@ module MaRuKu Globals = { :unsafe_features => false, + :on_error => :warning, - :debug_keep_ials => false, + + :use_numbered_headers => false, :maruku_signature => false, :code_background_color => '#fef', @@ -37,7 +39,12 @@ Globals = { :html_png_resolution => 200, :html_use_syntax => false, - :on_error => :warning + + :latex_use_listings => false, + :latex_cjk => false, + + :debug_keep_ials => false, + } class MDElement diff --git a/lib/maruku/input/html_helper.rb b/lib/maruku/input/html_helper.rb index 4275d90f..84932c60 100644 --- a/lib/maruku/input/html_helper.rb +++ b/lib/maruku/input/html_helper.rb @@ -29,12 +29,13 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser include MaRuKu::Strings Tag = %r{^<(/)?(\w+)\s*([^>]*)>}m + PartialTag = %r{^<.*}m + EverythingElse = %r{^[^<]+}m CommentStart = %r{^} TO_SANITIZE = ['img','hr'] -# attr_accessor :inside_comment attr_reader :rest def initialize @@ -42,72 +43,61 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser @tag_stack = [] @m = nil @already = "" - @inside_comment = false + self.state = :inside_element end + + attr_accessor :state # :inside_element, :inside_tag, :inside_comment, def eat_this(line) @rest = line + @rest things_read = 0 until @rest.empty? - if @inside_comment - if @m = CommentEnd.match(@rest) - @inside_comment = false - @already += @m.pre_match + @m.to_s - @rest = @m.post_match - elsif @m = EverythingElse.match(@rest) - @already += @m.pre_match + @m.to_s - @rest = @m.post_match - end - else - if @m = CommentStart.match(@rest) - things_read += 1 - @inside_comment = true - @already += @m.pre_match + @m.to_s - @rest = @m.post_match - elsif @m = Tag.match(@rest) - things_read += 1 - @already += @m.pre_match - @rest = @m.post_match - - is_closing = !!@m[1] - tag = @m[2] - attributes = @m[3] - - is_single = false - if attributes =~ /\A(.*)\/\Z/ - attributes = $1 - is_single = true + case self.state + when :inside_comment + if @m = CommentEnd.match(@rest) + @already += @m.pre_match + @m.to_s + @rest = @m.post_match + self.state = :inside_element + else + @already += @rest + @rest = "" + self.state = :inside_comment end - - if TO_SANITIZE.include? tag - attributes.strip! - # puts "Attributes: #{attributes.inspect}" - if attributes.size > 0 - @already += '<%s %s />' % [tag, attributes] - else - @already += '<%s />' % [tag] - end - elsif is_closing - @already += @m.to_s - if @tag_stack.empty? - error "Malformed: closing tag #{tag.inspect} "+ - "in empty list" - end - if @tag_stack.last != tag - error "Malformed: tag <#{tag}> "+ - "closes <#{@tag_stack.last}>" - end - @tag_stack.pop - elsif not is_single - @tag_stack.push tag - @already += @m.to_s + when :inside_element + if @m = CommentStart.match(@rest) + things_read += 1 + @already += @m.pre_match + @m.to_s + @rest = @m.post_match + self.state = :inside_comment + elsif @m = Tag.match(@rest) then + things_read += 1 + handle_tag + self.state = :inside_element + elsif @m = PartialTag.match(@rest) then + @already += @m.pre_match + @rest = @m.post_match + @partial_tag = @m.to_s + self.state = :inside_tag + elsif @m = EverythingElse.match(@rest) + @already += @m.pre_match + @m.to_s + @rest = @m.post_match + self.state = :inside_element + else + error "Malformed HTML: not complete: #{@rest.inspect}" + end + when :inside_tag + if @m = /^[^>]*>/.match(@rest) then + @partial_tag += @m.to_s + @rest = @partial_tag + @m.post_match + @partial_tag = nil + self.state = :inside_element + else + @partial_tag += @rest + @rest = "" + self.state = :inside_tag end - elsif @m = EverythingElse.match(@rest) - @already += @m.pre_match + @m.to_s - @rest = @m.post_match else - error "Malformed HTML: not complete: #{@rest.inspect}" - end + raise "Bug bug: state = #{self.state.inspect}" end # not inside comment # puts inspect @@ -116,12 +106,53 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser end end + def handle_tag() + @already += @m.pre_match + @rest = @m.post_match + is_closing = !!@m[1] + tag = @m[2] + attributes = @m[3] + + + is_single = false + if attributes =~ /\A(.*)\/\Z/ + attributes = $1 + is_single = true + end + +# puts "READ TAG #{@m.to_s.inspect} tag = #{tag} closing? #{is_closing} single = #{is_single}" + + if TO_SANITIZE.include? tag + attributes.strip! + # puts "Attributes: #{attributes.inspect}" + if attributes.size > 0 + @already += '<%s %s />' % [tag, attributes] + else + @already += '<%s />' % [tag] + end + elsif is_closing + @already += @m.to_s + if @tag_stack.empty? + error "Malformed: closing tag #{tag.inspect} "+ + "in empty list" + end + if @tag_stack.last != tag + error "Malformed: tag <#{tag}> "+ + "closes <#{@tag_stack.last}>" + end + @tag_stack.pop + else + @already += @m.to_s + + @tag_stack.push(tag) unless is_single + end + end def error(s) raise Exception, "Error: #{s} \n"+ inspect, caller end - def inspect; "HTML READER\n comment=#{@inside_comment} "+ + def inspect; "HTML READER\n state=#{self.state} "+ "match=#{@m.to_s.inspect}\n"+ "Tag stack = #{@tag_stack.inspect} \n"+ "Before:\n"+ @@ -137,7 +168,7 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser end def is_finished? - not @inside_comment and @tag_stack.empty? + (self.state == :inside_element) and @tag_stack.empty? end end # html helper diff --git a/lib/maruku/input/parse_span_better.rb b/lib/maruku/input/parse_span_better.rb index 211d7683..14fae039 100644 --- a/lib/maruku/input/parse_span_better.rb +++ b/lib/maruku/input/parse_span_better.rb @@ -557,6 +557,11 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser when ?[ # link ref ref_id = read_ref_id(src,con) if ref_id + if ref_id.size == 0 + ref_id = children.to_s.downcase.gsub(' ','_') + else + ref_id = ref_id.downcase + end con.push_element md_link(children, ref_id) else maruku_error "Could not read ref_id", src, con @@ -566,7 +571,8 @@ module MaRuKu; module In; module Markdown; module SpanLevelParser return end else # empty [link] - con.push_element md_link(children, "") + id = children.to_s.downcase.gsub(' ','_') + con.push_element md_link(children, id) end end # read link diff --git a/lib/maruku/input/type_detection.rb b/lib/maruku/input/type_detection.rb index db0b7bbf..b2985021 100644 --- a/lib/maruku/input/type_detection.rb +++ b/lib/maruku/input/type_detection.rb @@ -44,11 +44,12 @@ module MaRuKu; module Strings return :definition if l =~ Definition # I had a bug with emails and urls at the beginning of the # line that were mistaken for raw_html - return :text if l=~EMailAddress or l=~ URL + return :text if l=~ /^#{EMailAddress}/ + return :text if l=~ /^/ - URL = /^:all}) @@ -121,6 +165,7 @@ xhtml11_mathml2_svg11 = # me.attributes['content'] = 'text/html;charset=utf-8' me.attributes['content'] = 'application/xhtml+xml;charset=utf-8' + # Create title element doc_title = self.attributes[:title] || self.attributes[:subject] || "" title = Element.new 'title', head @@ -243,6 +288,34 @@ xhtml11_mathml2_svg11 = m end +=begin maruku_doc +Attribute: id +Scope: element +Output: LaTeX, HTML + +It is copied as a standard HTML attribute. + +Moreover, it used as a label name for hyperlinks in both HTML and +in PDF. + +=end + +=begin maruku_doc +Attribute: class +Scope: element +Output: HTML + +It is copied as a standard HTML attribute. +=end + +=begin maruku_doc +Attribute: style +Scope: element +Output: HTML + +It is copied as a standard HTML attribute. +=end + StandardAttributes = [:id, :style, :class] def create_html_element(name, attributes_to_copy=[]) m = Element.new name @@ -272,9 +345,20 @@ xhtml11_mathml2_svg11 = def to_html_strong; wrap_as_element('strong') end def to_html_emphasis; wrap_as_element('em') end +=begin maruku_doc +Attribute: use_numbered_headers +Scope: document +Summary: Activates the numbering of headers. + +If `true`, section headers will be numbered. + +In LaTeX export, the numbering of headers is managed +by Maruku, to have the same results in both HTML and LaTeX. +=end + # nil if not applicable, else string def section_number - return nil if not @doc.attributes[:use_numbered_headers] + return nil if not get_setting(:use_numbered_headers) n = @attributes[:section_number] if n && (not n.empty?) @@ -315,14 +399,35 @@ xhtml11_mathml2_svg11 = =begin maruku_doc Attribute: html_use_syntax -Scope: document -Output: html +Scope: global, document, element +Output: HTML Summary: Enables the use of the `syntax` package. Related: lang, code_lang -Default: +Default: + +If true, the `syntax` package is used. It supports the `ruby` and `xml` +languages. Remember to set the `lang` attribute of the code block. + +Examples: + + require 'maruku' + {:lang=ruby html_use_syntax=true} + +and + +
Div
+ {:lang=html html_use_syntax=true} + +produces: + + require 'maruku' +{:lang=ruby html_use_syntax=true} + +and + +
Div
+{:lang=html html_use_syntax=true} -If false, Maruku does not append a signature to the -generated file. =end def to_html_code; @@ -378,6 +483,26 @@ generated file. element end +=begin maruku_doc +Attribute: code_background_color +Scope: global, document, element +Summary: Background color for code blocks. + +The format is either a named color (`green`, `red`) or a CSS color +of the form `#ff00ff`. + +* for **HTML output**, the value is put straight in the `background-color` CSS + property of the block. + +* for **LaTeX output**, if it is a named color, it must be a color accepted + by the LaTeX `color` packages. If it is of the form `#ff00ff`, Maruku + defines a color using the `\color[rgb]{r,g,b}` macro. + + For example, for `#0000ff`, the macro is called as: `\color[rgb]{0,0,1}`. + +=end + + def to_html_code_using_pre(source) pre = create_html_element 'pre' code = Element.new 'code', pre @@ -427,11 +552,6 @@ generated file. def to_html_link a = wrap_as_element 'a' id = self.ref_id - # if empty, use text - if id.size == 0 - id = children.to_s.downcase.gsub(' ','_') - - end if ref = @doc.refs[id] url = ref[:url] diff --git a/lib/maruku/output/to_latex.rb b/lib/maruku/output/to_latex.rb index 84d24e44..fe9df62c 100644 --- a/lib/maruku/output/to_latex.rb +++ b/lib/maruku/output/to_latex.rb @@ -88,7 +88,7 @@ while the default is to add this: =end - encoding = @doc.attributes[:latex_cjk] ? + encoding = get_setting(:latex_cjk) ? Latex_preamble_enc_cjk : Latex_preamble_enc_utf8 =begin maruku_doc @@ -190,9 +190,31 @@ Admissible formats: end end +=begin maruku_doc +Attribute: code_show_spaces +Scope: global, document, element + +If `true`, shows spaces and tabs in code blocks. + +Example: + + One space + Two spaces + Tab, space, tab + Tab, tab, tab and all is green! + {:code_show_spaces code_background_color=#ffeedd} +{:markdown} + +That will produce: + + One space + Two spaces + Tab, space, tab + Tab, tab, tab and all is green! +{:code_show_spaces code_background_color=#ffeedd} + +=end - def to_latex_code; - raw_code = self.raw_code =begin maruku_doc Attribute: latex_use_listings Scope: document @@ -213,21 +235,24 @@ Otherwise, a standard `verbatim` environment is used. Please refer to the documentation of the `listings` package for supported languages. - + If a language is not supported, the `listings` package will emit a warning during the compilation. Just press enter and nothing wrong will happen. * If the `code_show_spaces` is specified, than spaces and tabs will be shown using the macro: - + \lstset{showspaces=true,showtabs=true} - + * The background color is given by `code_background_color`. =end + + def to_latex_code; + raw_code = self.raw_code - if @doc.attributes[:latex_use_listings] + if get_setting(:latex_use_listings) @doc.latex_require_package('listings') s = "\\lstset{columns=fixed,frame=shadowbox}" @@ -314,7 +339,7 @@ Otherwise, a standard `verbatim` environment is used. \\end{#{name}}\n" end - SAFE_CHARS = Set.new([?\ ] + (?a..?z).to_a + (?A..?Z).to_a) + SAFE_CHARS = Set.new((?a..?z).to_a + (?A..?Z).to_a) # the ultimate escaping # (is much better than using \verb) def latex_escape(source) @@ -345,7 +370,6 @@ Otherwise, a standard `verbatim` environment is used. end def to_latex_immediate_link - a = create_html_element 'a' url = self.url text = url.gsub(/^mailto:/,'') # don't show mailto # gsub('~','$\sim$') @@ -372,11 +396,6 @@ Otherwise, a standard `verbatim` environment is used. def to_latex_link id = self.ref_id - # if empty, use text - if id.size == 0 - id = children.to_s.downcase - end - ref = @doc.refs[id] if not ref $stderr.puts "Could not find id = '#{id}'" diff --git a/lib/maruku/tests/new_parser.rb b/lib/maruku/tests/new_parser.rb index 6374398a..873def15 100644 --- a/lib/maruku/tests/new_parser.rb +++ b/lib/maruku/tests/new_parser.rb @@ -152,10 +152,10 @@ module MaRuKu; module Tests ["\\[a\\]", ["[a]"], 'Escaping 2'], # This is valid in the new Markdown version # ["[a]", ["a"], 'Not a link'], - ["[a]", [ md_link(["a"],'')], 'Empty link'], + ["[a]", [ md_link(["a"],'a')], 'Empty link'], ["[a][]", ], - ["[a][]b", [ md_link(["a"],''),'b'], 'Empty link'], - ["[a\\]][]", [ md_link(["a]"],'')], 'Escape inside link'], + ["[a][]b", [ md_link(["a"],'a'),'b'], 'Empty link'], + ["[a\\]][]", [ md_link(["a]"],'a]')], 'Escape inside link'], ["[a", :throw, 'Link not closed'], ["[a][", :throw, 'Ref not closed'], diff --git a/lib/maruku/version.rb b/lib/maruku/version.rb index c8749b27..1c1ac8a3 100644 --- a/lib/maruku/version.rb +++ b/lib/maruku/version.rb @@ -19,7 +19,7 @@ #++ module MaRuKu - Version = '0.4.2.1' + Version = '0.5.0' MarukuURL = 'http://maruku.rubyforge.org/'