diff --git a/lib/chunks/category.rb b/lib/chunks/category.rb new file mode 100644 index 00000000..d08d8636 --- /dev/null +++ b/lib/chunks/category.rb @@ -0,0 +1,33 @@ +require 'chunks/chunk' + +# The category chunk looks for "category: news" on a line by +# itself and parses the terms after the ':' as categories. +# Other classes can search for Category chunks within +# rendered content to find out what categories this page +# should be in. +# +# Category lines can be hidden using ':category: news', for example +class Category < Chunk::Abstract + CATEGORY_PATTERN = /^(:)?category\s*:(.*)$/i + def self.pattern() CATEGORY_PATTERN end + + attr_reader :hidden, :list + +def initialize(match_data, content) + super(match_data, content) + @hidden = match_data[1] + @list = match_data[2].split(',').map { |c| c.strip } + @unmask_text = '' + if @hidden + @unmask_text = '' + else + category_urls = @list.map { |category| url(category) }.join(', ') + @unmask_text = '
category: ' + category_urls + '
' + end + end + + # TODO move presentation of page metadata to controller/view + def url(category) + %{#{category}} + end +end diff --git a/lib/chunks/chunk.rb b/lib/chunks/chunk.rb new file mode 100644 index 00000000..9ba3cc04 --- /dev/null +++ b/lib/chunks/chunk.rb @@ -0,0 +1,86 @@ +require 'uri/common' + +# A chunk is a pattern of text that can be protected +# and interrogated by a renderer. Each Chunk class has a +# +pattern+ that states what sort of text it matches. +# Chunks are initalized by passing in the result of a +# match by its pattern. + +module Chunk + class Abstract + + # automatically construct the array of derivatives of Chunk::Abstract + @derivatives = [] + + class << self + attr_reader :derivatives + end + + def self::inherited( klass ) + Abstract::derivatives << klass + end + + # the class name part of the mask strings + def self.mask_string + self.to_s.delete(':').downcase + end + + # a regexp that matches all chunk_types masks + def Abstract::mask_re(chunk_types) + tmp = chunk_types.map{|klass| klass.mask_string}.join("|") + Regexp.new("chunk([0-9a-f]+n\\d+)(#{tmp})chunk") + end + + attr_reader :text, :unmask_text, :unmask_mode + + def initialize(match_data, content) + @text = match_data[0] + @content = content + @unmask_mode = :normal + end + + # Find all the chunks of the given type in content + # Each time the pattern is matched, create a new + # chunk for it, and replace the occurance of the chunk + # in this content with its mask. + def self.apply_to(content) + content.gsub!( self.pattern ) do |match| + new_chunk = self.new($~, content) + content.add_chunk(new_chunk) + new_chunk.mask + end + end + + # should contain only [a-z0-9] + def mask + @mask ||="chunk#{@id}#{self.class.mask_string}chunk" + end + + # We should not use object_id because object_id is not guarantied + # to be unique when we restart the wiki (new object ids can equal old ones + # that were restored from madeleine storage) + def id + @id ||= "#{@content.page_id}n#{@content.chunk_id}" + end + + def unmask + @content.sub!(mask, @unmask_text) + end + + def rendered? + @unmask_mode == :normal + end + + def escaped? + @unmask_mode == :escape + end + + def revert + @content.sub!(mask, @text) + # unregister + @content.delete_chunk(self) + end + + end + +end diff --git a/lib/chunks/engines.rb b/lib/chunks/engines.rb new file mode 100644 index 00000000..fe5a96a8 --- /dev/null +++ b/lib/chunks/engines.rb @@ -0,0 +1,61 @@ +$: << File.dirname(__FILE__) + "../../lib" + +require 'redcloth' +require 'bluecloth_tweaked' +require 'rdocsupport' +require 'chunks/chunk' + +# The markup engines are Chunks that call the one of RedCloth +# or RDoc to convert text. This markup occurs when the chunk is required +# to mask itself. +module Engines + class AbstractEngine < Chunk::Abstract + + # Create a new chunk for the whole content and replace it with its mask. + def self.apply_to(content) + new_chunk = self.new(content) + content.replace(new_chunk.mask) + end + + private + + # Never create engines by constructor - use apply_to instead + def initialize(content) + @content = content + end + + end + + class Textile < AbstractEngine + def mask + redcloth = RedCloth.new(@content, [:hard_breaks] + @content.options[:engine_opts]) + redcloth.filter_html = false + redcloth.no_span_caps = false + redcloth.to_html(:textile) + end + end + + class Markdown < AbstractEngine + def mask + BlueCloth.new(@content, @content.options[:engine_opts]).to_html + end + end + + class Mixed < AbstractEngine + def mask + redcloth = RedCloth.new(@content, @content.options[:engine_opts]) + redcloth.filter_html = false + redcloth.no_span_caps = false + redcloth.to_html + end + end + + class RDoc < AbstractEngine + def mask + RDocSupport::RDocFormatter.new(@content).to_html + end + end + + MAP = { :textile => Textile, :markdown => Markdown, :mixed => Mixed, :rdoc => RDoc } + MAP.default = Textile +end diff --git a/lib/chunks/include.rb b/lib/chunks/include.rb new file mode 100644 index 00000000..370093cc --- /dev/null +++ b/lib/chunks/include.rb @@ -0,0 +1,41 @@ +require 'chunks/wiki' + +# Includes the contents of another page for rendering. +# The include command looks like this: "[[!include PageName]]". +# It is a WikiReference since it refers to another page (PageName) +# and the wiki content using this command must be notified +# of changes to that page. +# If the included page could not be found, a warning is displayed. + +class Include < WikiChunk::WikiReference + + INCLUDE_PATTERN = /\[\[!include\s+(.*?)\]\]\s*/i + def self.pattern() INCLUDE_PATTERN end + + + def initialize(match_data, content) + super + @page_name = match_data[1].strip + @unmask_text = get_unmask_text_avoiding_recursion_loops + end + + private + + def get_unmask_text_avoiding_recursion_loops + if refpage then + refpage.clear_display_cache + if refpage.wiki_includes.include?(@content.page_name) + # this will break the recursion + @content.delete_chunk(self) + return "Recursive include detected; #{@page_name} --> #{@content.page_name} " + + "--> #{@page_name}\n" + else + @content.merge_chunks(refpage.display_content) + return refpage.display_content.pre_rendered + end + else + return "Could not include #{@page_name}\n" + end + end + +end diff --git a/lib/chunks/literal.rb b/lib/chunks/literal.rb new file mode 100644 index 00000000..09da4005 --- /dev/null +++ b/lib/chunks/literal.rb @@ -0,0 +1,31 @@ +require 'chunks/chunk' + +# These are basic chunks that have a pattern and can be protected. +# They are used by rendering process to prevent wiki rendering +# occuring within literal areas such as and
 blocks
+# and within HTML tags.
+module Literal
+
+  class AbstractLiteral < Chunk::Abstract
+
+    def initialize(match_data, content)
+      super
+      @unmask_text = @text
+    end
+
+  end
+
+  # A literal chunk that protects 'code' and 'pre' tags from wiki rendering.
+  class Pre < AbstractLiteral
+    PRE_BLOCKS = "a|pre|code"
+    PRE_PATTERN = Regexp.new('<('+PRE_BLOCKS+')\b[^>]*?>.*?', Regexp::MULTILINE)
+    def self.pattern() PRE_PATTERN end
+  end 
+
+  # A literal chunk that protects HTML tags from wiki rendering.
+  class Tags < AbstractLiteral
+    TAGS = "a|img|em|strong|div|span|table|td|th|ul|ol|li|dl|dt|dd"
+    TAGS_PATTERN = Regexp.new('<(?:'+TAGS+')[^>]*?>', Regexp::MULTILINE) 
+    def self.pattern() TAGS_PATTERN  end
+  end
+end
diff --git a/lib/chunks/nowiki.rb b/lib/chunks/nowiki.rb
new file mode 100644
index 00000000..ef99ec0b
--- /dev/null
+++ b/lib/chunks/nowiki.rb
@@ -0,0 +1,28 @@
+require 'chunks/chunk'
+
+# This chunks allows certain parts of a wiki page to be hidden from the
+# rest of the rendering pipeline. It should be run at the beginning
+# of the pipeline in `wiki_content.rb`.
+#
+# An example use of this chunk is to markup double brackets or
+# auto URI links:
+#  Here are [[double brackets]] and a URI: www.uri.org
+#
+# The contents of the chunks will not be processed by any other chunk
+# so the `www.uri.org` and the double brackets will appear verbatim.
+#
+# Author: Mark Reid 
+# Created: 8th June 2004
+class NoWiki < Chunk::Abstract
+
+  NOWIKI_PATTERN = Regexp.new('(.*?)', Regexp::MULTILINE)
+  def self.pattern() NOWIKI_PATTERN end
+
+  attr_reader :plain_text
+
+  def initialize(match_data, content)
+    super
+    @plain_text = @unmask_text = match_data[1]
+  end
+
+end
diff --git a/lib/chunks/test.rb b/lib/chunks/test.rb
new file mode 100644
index 00000000..edf77d14
--- /dev/null
+++ b/lib/chunks/test.rb
@@ -0,0 +1,18 @@
+require 'test/unit'
+
+class ChunkTest < Test::Unit::TestCase
+
+  # Asserts a number of tests for the given type and text.
+  def match(type, test_text, expected)
+	pattern = type.pattern
+    assert_match(pattern, test_text)
+    pattern =~ test_text   # Previous assertion guarantees match
+    chunk = type.new($~)
+    
+    # Test if requested parts are correct.
+    for method_sym, value in expected do
+      assert_respond_to(chunk, method_sym)
+      assert_equal(value, chunk.method(method_sym).call, "Checking value of '#{method_sym}'")
+    end
+  end
+end
diff --git a/lib/chunks/uri.rb b/lib/chunks/uri.rb
new file mode 100644
index 00000000..1a208535
--- /dev/null
+++ b/lib/chunks/uri.rb
@@ -0,0 +1,182 @@
+require 'chunks/chunk'
+
+# This wiki chunk matches arbitrary URIs, using patterns from the Ruby URI modules.
+# It parses out a variety of fields that could be used by renderers to format
+# the links in various ways (shortening domain names, hiding email addresses)
+# It matches email addresses and host.com.au domains without schemes (http://)
+# but adds these on as required.
+#
+# The heuristic used to match a URI is designed to err on the side of caution.
+# That is, it is more likely to not autolink a URI than it is to accidently
+# autolink something that is not a URI. The reason behind this is it is easier
+# to force a URI link by prefixing 'http://' to it than it is to escape and
+# incorrectly marked up non-URI.
+#
+# I'm using a part of the [ISO 3166-1 Standard][iso3166] for country name suffixes.
+# The generic names are from www.bnoack.com/data/countrycode2.html)
+#   [iso3166]: http://geotags.com/iso3166/
+
+class URIChunk < Chunk::Abstract
+  include URI::REGEXP::PATTERN
+
+  # this condition is to get rid of pesky warnings in tests
+  unless defined? URIChunk::INTERNET_URI_REGEXP
+
+    GENERIC = 'aero|biz|com|coop|edu|gov|info|int|mil|museum|name|net|org'
+    
+    COUNTRY = 'ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|az|ba|bb|bd|be|' + 
+      'bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cf|cd|cg|ch|ci|ck|cl|' + 
+      'cm|cn|co|cr|cs|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|fi|' + 
+      'fj|fk|fm|fo|fr|fx|ga|gb|gd|ge|gf|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|' + 
+      'hk|hm|hn|hr|ht|hu|id|ie|il|in|io|iq|ir|is|it|jm|jo|jp|ke|kg|kh|ki|km|kn|' + 
+      'kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|mg|mh|mk|ml|mm|' + 
+      'mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nt|' + 
+      'nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|pt|pw|py|qa|re|ro|ru|rw|sa|sb|sc|' + 
+      'sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|' + 
+      'tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|' + 
+      'ws|ye|yt|yu|za|zm|zr|zw'
+    # These are needed otherwise HOST will match almost anything
+    TLDS = "(?:#{GENERIC}|#{COUNTRY})"
+    
+    # Redefine USERINFO so that it must have non-zero length
+    USERINFO = "(?:[#{UNRESERVED};:&=+$,]|#{ESCAPED})+"
+  
+    # unreserved_no_ending = alphanum | mark, but URI_ENDING [)!] excluded
+    UNRESERVED_NO_ENDING = "-_.~*'(#{ALNUM}"  
+
+    # this ensures that query or fragment do not end with URI_ENDING
+    # and enable us to use a much simpler self.pattern Regexp
+
+    # uric_no_ending = reserved | unreserved_no_ending | escaped
+    URIC_NO_ENDING = "(?:[#{UNRESERVED_NO_ENDING}#{RESERVED}]|#{ESCAPED})"
+    # query = *uric
+    QUERY = "#{URIC_NO_ENDING}*"
+    # fragment = *uric
+    FRAGMENT = "#{URIC_NO_ENDING}*"
+
+    # DOMLABEL is defined in the ruby uri library, TLDS is defined above
+    INTERNET_HOSTNAME = "(?:#{DOMLABEL}\\.)+#{TLDS}" 
+
+    # Correct a typo bug in ruby 1.8.x lib/uri/common.rb 
+    PORT = '\\d*'
+
+    INTERNET_URI =
+        "(?:(#{SCHEME}):/{0,2})?" +   # Optional scheme:        (\1)
+        "(?:(#{USERINFO})@)?" +       # Optional userinfo@      (\2)
+        "(#{INTERNET_HOSTNAME})" +    # Mandatory hostname      (\3)
+        "(?::(#{PORT}))?" +           # Optional :port          (\4)
+        "(#{ABS_PATH})?"  +           # Optional absolute path  (\5)
+        "(?:\\?(#{QUERY}))?" +        # Optional ?query         (\6)
+        "(?:\\#(#{FRAGMENT}))?"  +    # Optional #fragment      (\7)
+        '(?=\.?(?:\s|\)|\z))'         # ends only with optional dot + space or ")" 
+                                      # or end of the string
+
+    SUSPICIOUS_PRECEDING_CHARACTER = '(!|\"\:|\"|\\\'|\]\()?'  # any of !, ":, ", ', ](
+  
+    INTERNET_URI_REGEXP = 
+        Regexp.new(SUSPICIOUS_PRECEDING_CHARACTER + INTERNET_URI, Regexp::EXTENDED, 'N')
+
+  end
+
+  def URIChunk.pattern
+    INTERNET_URI_REGEXP
+  end
+
+  attr_reader :user, :host, :port, :path, :query, :fragment, :link_text
+  
+  def self.apply_to(content)
+    content.gsub!( self.pattern ) do |matched_text|
+      chunk = self.new($~, content)
+      if chunk.avoid_autolinking?
+        # do not substitute nor register the chunk
+        matched_text
+      else
+        content.add_chunk(chunk)
+        chunk.mask
+      end
+    end
+  end
+
+  def initialize(match_data, content)
+    super
+    @link_text = match_data[0]
+    @suspicious_preceding_character = match_data[1]
+    @original_scheme, @user, @host, @port, @path, @query, @fragment = match_data[2..-1]
+    treat_trailing_character
+    @unmask_text = "#{link_text}"
+  end
+
+  def avoid_autolinking?
+    not @suspicious_preceding_character.nil?
+  end
+
+  def treat_trailing_character
+    # If the last character matched by URI pattern is in ! or ), this may be part of the markup,
+    # not a URL. We should handle it as such. It is possible to do it by a regexp, but 
+    # much easier to do programmatically
+    last_char = @link_text[-1..-1]
+    if last_char == ')' or last_char == '!'
+      @trailing_punctuation = last_char
+      @link_text.chop!
+      [@original_scheme, @user, @host, @port, @path, @query, @fragment].compact.last.chop!
+    else 
+      @trailing_punctuation = nil
+    end
+  end
+
+  def scheme
+    @original_scheme or (@user ? 'mailto' : 'http')
+  end
+
+  def scheme_delimiter
+    scheme == 'mailto' ? ':' : '://'
+  end
+
+  def user_delimiter
+     '@' unless @user.nil?
+  end
+
+  def port_delimiter
+     ':' unless @port.nil?
+  end
+
+  def query_delimiter
+     '?' unless @query.nil?
+  end
+
+  def uri
+    [scheme, scheme_delimiter, user, user_delimiter, host, port_delimiter, port, path, 
+      query_delimiter, query].compact.join
+  end
+
+end
+
+# uri with mandatory scheme but less restrictive hostname, like
+# http://localhost:2500/blah.html
+class LocalURIChunk < URIChunk
+
+  unless defined? LocalURIChunk::LOCAL_URI_REGEXP
+    # hostname can be just a simple word like 'localhost'
+    ANY_HOSTNAME = "(?:#{DOMLABEL}\\.)*#{TOPLABEL}\\.?"
+    
+    # The basic URI expression as a string
+    # Scheme and hostname are mandatory
+    LOCAL_URI =
+        "(?:(#{SCHEME})://)+" +       # Mandatory scheme://     (\1)
+        "(?:(#{USERINFO})@)?" +       # Optional userinfo@      (\2)
+        "(#{ANY_HOSTNAME})" +         # Mandatory hostname      (\3)
+        "(?::(#{PORT}))?" +           # Optional :port          (\4)
+        "(#{ABS_PATH})?"  +           # Optional absolute path  (\5)
+        "(?:\\?(#{QUERY}))?" +        # Optional ?query         (\6)
+        "(?:\\#(#{FRAGMENT}))?" +     # Optional #fragment      (\7)
+        '(?=\.?(?:\s|\)|\z))'         # ends only with optional dot + space or ")" 
+                                      # or end of the string
+  
+    LOCAL_URI_REGEXP = Regexp.new(SUSPICIOUS_PRECEDING_CHARACTER + LOCAL_URI, Regexp::EXTENDED, 'N')
+  end
+
+  def LocalURIChunk.pattern
+    LOCAL_URI_REGEXP
+  end
+
+end
diff --git a/lib/chunks/wiki.rb b/lib/chunks/wiki.rb
new file mode 100644
index 00000000..840f644a
--- /dev/null
+++ b/lib/chunks/wiki.rb
@@ -0,0 +1,141 @@
+require 'wiki_words'
+require 'chunks/chunk'
+require 'chunks/wiki'
+require 'cgi'
+
+# Contains all the methods for finding and replacing wiki related links.
+module WikiChunk
+  include Chunk
+
+  # A wiki reference is the top-level class for anything that refers to
+  # another wiki page.
+  class WikiReference < Chunk::Abstract
+
+    # Name of the referenced page
+    attr_reader :page_name
+    
+    # the referenced page
+    def refpage
+      @content.web.pages[@page_name]
+    end
+  
+  end
+
+  # A wiki link is the top-level class for links that refers to
+  # another wiki page.
+  class WikiLink < WikiReference
+ 
+    attr_reader :link_text, :link_type
+
+    def initialize(match_data, content)
+      super
+      @link_type = :show
+    end
+
+    def self.apply_to(content)
+      content.gsub!( self.pattern ) do |matched_text|
+        chunk = self.new($~, content)
+        if chunk.textile_url?
+          # do not substitute
+          matched_text
+        else
+          content.add_chunk(chunk)
+          chunk.mask
+        end
+      end
+    end
+
+    # the referenced page
+    def refpage
+      @content.web.pages[@page_name]
+    end
+
+    def textile_url?
+      not @textile_link_suffix.nil?
+    end
+
+  end
+
+  # This chunk matches a WikiWord. WikiWords can be escaped
+  # by prepending a '\'. When this is the case, the +escaped_text+
+  # method will return the WikiWord instead of the usual +nil+.
+  # The +page_name+ method returns the matched WikiWord.
+  class Word < WikiLink
+
+    attr_reader :escaped_text
+    
+    unless defined? WIKI_WORD
+      WIKI_WORD = Regexp.new('(":)?(\\\\)?(' + WikiWords::WIKI_WORD_PATTERN + ')\b', 0, "utf-8")
+    end
+
+    def self.pattern
+      WIKI_WORD
+    end
+
+    def initialize(match_data, content)
+      super
+      @textile_link_suffix, @escape, @page_name = match_data[1..3]
+      if @escape 
+        @unmask_mode = :escape
+        @escaped_text = @page_name
+      else
+        @escaped_text = nil
+      end
+      @link_text = WikiWords.separate(@page_name)
+      @unmask_text = (@escaped_text || @content.page_link(@page_name, @link_text, @link_type))
+    end
+
+  end
+
+  # This chunk handles [[bracketted wiki words]] and 
+  # [[AliasedWords|aliased wiki words]]. The first part of an
+  # aliased wiki word must be a WikiWord. If the WikiWord
+  # is aliased, the +link_text+ field will contain the
+  # alias, otherwise +link_text+ will contain the entire
+  # contents within the double brackets.
+  #
+  # NOTE: This chunk must be tested before WikiWord since
+  #       a WikiWords can be a substring of a WikiLink. 
+  class Link < WikiLink
+    
+    unless defined? WIKI_LINK
+      WIKI_LINK = /(":)?\[\[\s*([^\]\s][^\]]+?)\s*\]\]/
+      LINK_TYPE_SEPARATION = Regexp.new('^(.+):((file)|(pic))$', 0, 'utf-8')
+      ALIAS_SEPARATION = Regexp.new('^(.+)\|(.+)$', 0, 'utf-8')
+    end    
+        
+    def self.pattern() WIKI_LINK end
+
+    def initialize(match_data, content)
+      super
+      @textile_link_suffix, @page_name = match_data[1..2]
+      @link_text = @page_name
+      separate_link_type
+      separate_alias
+      @unmask_text = @content.page_link(@page_name, @link_text, @link_type)
+    end
+
+    private
+
+    # if link wihin the brackets has a form of [[filename:file]] or [[filename:pic]], 
+    # this means a link to a picture or a file
+    def separate_link_type
+      link_type_match = LINK_TYPE_SEPARATION.match(@page_name)
+      if link_type_match
+        @link_text = @page_name = link_type_match[1]
+        @link_type = link_type_match[2..3].compact[0].to_sym
+      end
+    end
+
+    # link text may be different from page name. this will look like [[actual page|link text]]
+    def separate_alias
+      alias_match = ALIAS_SEPARATION.match(@page_name)
+      if alias_match
+        @page_name, @link_text = alias_match[1..2]
+      end
+      # note that [[filename|link text:file]] is also supported
+    end  
+  
+  end
+  
+end