added chunks

2005-08-09 02:07:39 +00:00 · 2005-08-09 02:07:39 +00:00 · c4b7b2d9f2
commit c4b7b2d9f2
parent 8c331d1019
9 changed files with 621 additions and 0 deletions
--- a/lib/chunks/category.rb
+++ b/lib/chunks/category.rb
@ -0,0 +1,33 @@
+require 'chunks/chunk'
+
+# The category chunk looks for "category: news" on a line by
+# itself and parses the terms after the ':' as categories.
+# Other classes can search for Category chunks within
+# rendered content to find out what categories this page
+# should be in.
+#
+# Category lines can be hidden using ':category: news', for example
+class Category < Chunk::Abstract
+  CATEGORY_PATTERN = /^(:)?category\s*:(.*)$/i
+  def self.pattern() CATEGORY_PATTERN  end
+
+  attr_reader :hidden, :list
+
+def initialize(match_data, content)
+    super(match_data, content)
+    @hidden = match_data[1]
+    @list = match_data[2].split(',').map { |c| c.strip }
+    @unmask_text = ''
+    if @hidden
+      @unmask_text = ''
+    else
+      category_urls = @list.map { |category| url(category) }.join(', ')
+      @unmask_text = '<div class="property"> category: ' + category_urls + '</div>'
+    end
+  end
+
+  # TODO move presentation of page metadata to controller/view
+  def url(category)
+    %{<a class="category_link" href="../list/?category=#{category}">#{category}</a>}
+  end
+end
--- a/lib/chunks/chunk.rb
+++ b/lib/chunks/chunk.rb
@ -0,0 +1,86 @@
+require 'uri/common'
+
+# A chunk is a pattern of text that can be protected
+# and interrogated by a renderer. Each Chunk class has a 
+# +pattern+ that states what sort of text it matches.
+# Chunks are initalized by passing in the result of a
+# match by its pattern. 
+
+module Chunk
+  class Abstract
+
+    # automatically construct the array of derivatives of Chunk::Abstract
+    @derivatives = [] 
+
+    class << self 
+      attr_reader :derivatives 
+    end 
+    
+    def self::inherited( klass ) 
+      Abstract::derivatives << klass 
+    end 
+    
+    # the class name part of the mask strings
+    def self.mask_string
+      self.to_s.delete(':').downcase
+    end
+
+    # a regexp that matches all chunk_types masks
+    def Abstract::mask_re(chunk_types)
+      tmp = chunk_types.map{|klass| klass.mask_string}.join("|")
+      Regexp.new("chunk([0-9a-f]+n\\d+)(#{tmp})chunk")
+    end
+    
+    attr_reader :text, :unmask_text, :unmask_mode
+
+    def initialize(match_data, content) 
+      @text = match_data[0] 
+      @content = content
+      @unmask_mode = :normal
+    end
+
+    # Find all the chunks of the given type in content
+    # Each time the pattern is matched, create a new
+    # chunk for it, and replace the occurance of the chunk
+    # in this content with its mask.
+	def self.apply_to(content)
+	  content.gsub!( self.pattern ) do |match|	
+        new_chunk = self.new($~, content)
+        content.add_chunk(new_chunk)
+        new_chunk.mask
+      end
+    end
+
+    # should contain only [a-z0-9]
+    def mask
+      @mask ||="chunk#{@id}#{self.class.mask_string}chunk"
+    end
+
+    # We should not use object_id because object_id is not guarantied 
+    # to be unique when we restart the wiki (new object ids can equal old ones
+    # that were restored from madeleine storage)  
+    def id
+      @id ||= "#{@content.page_id}n#{@content.chunk_id}"
+    end
+
+    def unmask
+      @content.sub!(mask, @unmask_text)
+    end
+
+    def rendered?
+      @unmask_mode == :normal
+    end
+
+    def escaped?
+      @unmask_mode == :escape
+    end
+
+    def revert
+      @content.sub!(mask, @text)
+      # unregister
+      @content.delete_chunk(self)
+    end
+
+  end
+
+end
--- a/lib/chunks/engines.rb
+++ b/lib/chunks/engines.rb
@ -0,0 +1,61 @@
+$: << File.dirname(__FILE__) + "../../lib"
+
+require 'redcloth'
+require 'bluecloth_tweaked'
+require 'rdocsupport'
+require 'chunks/chunk'
+
+# The markup engines are Chunks that call the one of RedCloth
+# or RDoc to convert text. This markup occurs when the chunk is required
+# to mask itself.
+module Engines
+  class AbstractEngine < Chunk::Abstract
+
+    # Create a new chunk for the whole content and replace it with its mask.
+    def self.apply_to(content)
+      new_chunk = self.new(content)
+      content.replace(new_chunk.mask)
+    end
+
+    private 
+
+    # Never create engines by constructor - use apply_to instead
+    def initialize(content) 
+      @content = content
+    end
+
+  end
+
+  class Textile < AbstractEngine
+    def mask
+      redcloth = RedCloth.new(@content, [:hard_breaks] + @content.options[:engine_opts])
+      redcloth.filter_html = false
+      redcloth.no_span_caps = false  
+      redcloth.to_html(:textile)
+    end
+  end
+
+  class Markdown < AbstractEngine
+    def mask
+      BlueCloth.new(@content, @content.options[:engine_opts]).to_html
+    end
+  end
+
+  class Mixed < AbstractEngine
+    def mask
+      redcloth = RedCloth.new(@content, @content.options[:engine_opts])
+      redcloth.filter_html = false
+      redcloth.no_span_caps = false
+      redcloth.to_html
+    end
+  end
+
+  class RDoc < AbstractEngine
+    def mask
+      RDocSupport::RDocFormatter.new(@content).to_html
+    end
+  end
+
+  MAP = { :textile => Textile, :markdown => Markdown, :mixed => Mixed, :rdoc => RDoc }
+  MAP.default = Textile
+end
--- a/lib/chunks/include.rb
+++ b/lib/chunks/include.rb
@ -0,0 +1,41 @@
+require 'chunks/wiki'
+
+# Includes the contents of another page for rendering.
+# The include command looks like this: "[[!include PageName]]".
+# It is a WikiReference since it refers to another page (PageName)
+# and the wiki content using this command must be notified
+# of changes to that page.
+# If the included page could not be found, a warning is displayed.
+
+class Include < WikiChunk::WikiReference
+
+  INCLUDE_PATTERN = /\[\[!include\s+(.*?)\]\]\s*/i
+  def self.pattern() INCLUDE_PATTERN end
+
+
+  def initialize(match_data, content)
+    super
+    @page_name = match_data[1].strip
+    @unmask_text = get_unmask_text_avoiding_recursion_loops
+  end
+
+  private
+  
+  def get_unmask_text_avoiding_recursion_loops
+    if refpage then
+      refpage.clear_display_cache
+      if refpage.wiki_includes.include?(@content.page_name)
+        # this will break the recursion
+        @content.delete_chunk(self)
+        return "<em>Recursive include detected; #{@page_name} --> #{@content.page_name} " + 
+               "--> #{@page_name}</em>\n"
+      else
+        @content.merge_chunks(refpage.display_content)
+        return refpage.display_content.pre_rendered 
+      end
+    else
+      return "<em>Could not include #{@page_name}</em>\n"
+    end
+  end
+
+end
--- a/lib/chunks/literal.rb
+++ b/lib/chunks/literal.rb
@ -0,0 +1,31 @@
+require 'chunks/chunk'
+
+# These are basic chunks that have a pattern and can be protected.
+# They are used by rendering process to prevent wiki rendering
+# occuring within literal areas such as <code> and <pre> blocks
+# and within HTML tags.
+module Literal
+
+  class AbstractLiteral < Chunk::Abstract
+
+    def initialize(match_data, content)
+      super
+      @unmask_text = @text
+    end
+
+  end
+
+  # A literal chunk that protects 'code' and 'pre' tags from wiki rendering.
+  class Pre < AbstractLiteral
+    PRE_BLOCKS = "a|pre|code"
+    PRE_PATTERN = Regexp.new('<('+PRE_BLOCKS+')\b[^>]*?>.*?</\1>', Regexp::MULTILINE)
+    def self.pattern() PRE_PATTERN end
+  end 
+
+  # A literal chunk that protects HTML tags from wiki rendering.
+  class Tags < AbstractLiteral
+    TAGS = "a|img|em|strong|div|span|table|td|th|ul|ol|li|dl|dt|dd"
+    TAGS_PATTERN = Regexp.new('<(?:'+TAGS+')[^>]*?>', Regexp::MULTILINE) 
+    def self.pattern() TAGS_PATTERN  end
+  end
+end
--- a/lib/chunks/nowiki.rb
+++ b/lib/chunks/nowiki.rb
@ -0,0 +1,28 @@
+require 'chunks/chunk'
+
+# This chunks allows certain parts of a wiki page to be hidden from the
+# rest of the rendering pipeline. It should be run at the beginning
+# of the pipeline in `wiki_content.rb`.
+#
+# An example use of this chunk is to markup double brackets or
+# auto URI links:
+#  <nowiki>Here are [[double brackets]] and a URI: www.uri.org</nowiki>
+#
+# The contents of the chunks will not be processed by any other chunk
+# so the `www.uri.org` and the double brackets will appear verbatim.
+#
+# Author: Mark Reid <mark at threewordslong dot com>
+# Created: 8th June 2004
+class NoWiki < Chunk::Abstract
+
+  NOWIKI_PATTERN = Regexp.new('<nowiki>(.*?)</nowiki>', Regexp::MULTILINE)
+  def self.pattern() NOWIKI_PATTERN end
+
+  attr_reader :plain_text
+
+  def initialize(match_data, content)
+    super
+    @plain_text = @unmask_text = match_data[1]
+  end
+
+end
--- a/lib/chunks/test.rb
+++ b/lib/chunks/test.rb
@ -0,0 +1,18 @@
+require 'test/unit'
+
+class ChunkTest < Test::Unit::TestCase
+
+  # Asserts a number of tests for the given type and text.
+  def match(type, test_text, expected)
+	pattern = type.pattern
+    assert_match(pattern, test_text)
+    pattern =~ test_text   # Previous assertion guarantees match
+    chunk = type.new($~)
+    
+    # Test if requested parts are correct.
+    for method_sym, value in expected do
+      assert_respond_to(chunk, method_sym)
+      assert_equal(value, chunk.method(method_sym).call, "Checking value of '#{method_sym}'")
+    end
+  end
+end
--- a/lib/chunks/uri.rb
+++ b/lib/chunks/uri.rb
@ -0,0 +1,182 @@
+require 'chunks/chunk'
+
+# This wiki chunk matches arbitrary URIs, using patterns from the Ruby URI modules.
+# It parses out a variety of fields that could be used by renderers to format
+# the links in various ways (shortening domain names, hiding email addresses)
+# It matches email addresses and host.com.au domains without schemes (http://)
+# but adds these on as required.
+#
+# The heuristic used to match a URI is designed to err on the side of caution.
+# That is, it is more likely to not autolink a URI than it is to accidently
+# autolink something that is not a URI. The reason behind this is it is easier
+# to force a URI link by prefixing 'http://' to it than it is to escape and
+# incorrectly marked up non-URI.
+#
+# I'm using a part of the [ISO 3166-1 Standard][iso3166] for country name suffixes.
+# The generic names are from www.bnoack.com/data/countrycode2.html)
+#   [iso3166]: http://geotags.com/iso3166/
+
+class URIChunk < Chunk::Abstract
+  include URI::REGEXP::PATTERN
+
+  # this condition is to get rid of pesky warnings in tests
+  unless defined? URIChunk::INTERNET_URI_REGEXP
+
+    GENERIC = 'aero|biz|com|coop|edu|gov|info|int|mil|museum|name|net|org'
+    
+    COUNTRY = 'ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|az|ba|bb|bd|be|' + 
+      'bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cf|cd|cg|ch|ci|ck|cl|' + 
+      'cm|cn|co|cr|cs|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|fi|' + 
+      'fj|fk|fm|fo|fr|fx|ga|gb|gd|ge|gf|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|' + 
+      'hk|hm|hn|hr|ht|hu|id|ie|il|in|io|iq|ir|is|it|jm|jo|jp|ke|kg|kh|ki|km|kn|' + 
+      'kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|mg|mh|mk|ml|mm|' + 
+      'mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nt|' + 
+      'nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|pt|pw|py|qa|re|ro|ru|rw|sa|sb|sc|' + 
+      'sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|' + 
+      'tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|' + 
+      'ws|ye|yt|yu|za|zm|zr|zw'
+    # These are needed otherwise HOST will match almost anything
+    TLDS = "(?:#{GENERIC}|#{COUNTRY})"
+    
+    # Redefine USERINFO so that it must have non-zero length
+    USERINFO = "(?:[#{UNRESERVED};:&=+$,]|#{ESCAPED})+"
+  
+    # unreserved_no_ending = alphanum | mark, but URI_ENDING [)!] excluded
+    UNRESERVED_NO_ENDING = "-_.~*'(#{ALNUM}"  
+
+    # this ensures that query or fragment do not end with URI_ENDING
+    # and enable us to use a much simpler self.pattern Regexp
+
+    # uric_no_ending = reserved | unreserved_no_ending | escaped
+    URIC_NO_ENDING = "(?:[#{UNRESERVED_NO_ENDING}#{RESERVED}]|#{ESCAPED})"
+    # query = *uric
+    QUERY = "#{URIC_NO_ENDING}*"
+    # fragment = *uric
+    FRAGMENT = "#{URIC_NO_ENDING}*"
+
+    # DOMLABEL is defined in the ruby uri library, TLDS is defined above
+    INTERNET_HOSTNAME = "(?:#{DOMLABEL}\\.)+#{TLDS}" 
+
+    # Correct a typo bug in ruby 1.8.x lib/uri/common.rb 
+    PORT = '\\d*'
+
+    INTERNET_URI =
+        "(?:(#{SCHEME}):/{0,2})?" +   # Optional scheme:        (\1)
+        "(?:(#{USERINFO})@)?" +       # Optional userinfo@      (\2)
+        "(#{INTERNET_HOSTNAME})" +    # Mandatory hostname      (\3)
+        "(?::(#{PORT}))?" +           # Optional :port          (\4)
+        "(#{ABS_PATH})?"  +           # Optional absolute path  (\5)
+        "(?:\\?(#{QUERY}))?" +        # Optional ?query         (\6)
+        "(?:\\#(#{FRAGMENT}))?"  +    # Optional #fragment      (\7)
+        '(?=\.?(?:\s|\)|\z))'         # ends only with optional dot + space or ")" 
+                                      # or end of the string
+
+    SUSPICIOUS_PRECEDING_CHARACTER = '(!|\"\:|\"|\\\'|\]\()?'  # any of !, ":, ", ', ](
+  
+    INTERNET_URI_REGEXP = 
+        Regexp.new(SUSPICIOUS_PRECEDING_CHARACTER + INTERNET_URI, Regexp::EXTENDED, 'N')
+
+  end
+
+  def URIChunk.pattern
+    INTERNET_URI_REGEXP
+  end
+
+  attr_reader :user, :host, :port, :path, :query, :fragment, :link_text
+  
+  def self.apply_to(content)
+    content.gsub!( self.pattern ) do |matched_text|
+      chunk = self.new($~, content)
+      if chunk.avoid_autolinking?
+        # do not substitute nor register the chunk
+        matched_text
+      else
+        content.add_chunk(chunk)
+        chunk.mask
+      end
+    end
+  end
+
+  def initialize(match_data, content)
+    super
+    @link_text = match_data[0]
+    @suspicious_preceding_character = match_data[1]
+    @original_scheme, @user, @host, @port, @path, @query, @fragment = match_data[2..-1]
+    treat_trailing_character
+    @unmask_text = "<a href=\"#{uri}\">#{link_text}</a>"
+  end
+
+  def avoid_autolinking?
+    not @suspicious_preceding_character.nil?
+  end
+
+  def treat_trailing_character
+    # If the last character matched by URI pattern is in ! or ), this may be part of the markup,
+    # not a URL. We should handle it as such. It is possible to do it by a regexp, but 
+    # much easier to do programmatically
+    last_char = @link_text[-1..-1]
+    if last_char == ')' or last_char == '!'
+      @trailing_punctuation = last_char
+      @link_text.chop!
+      [@original_scheme, @user, @host, @port, @path, @query, @fragment].compact.last.chop!
+    else 
+      @trailing_punctuation = nil
+    end
+  end
+
+  def scheme
+    @original_scheme or (@user ? 'mailto' : 'http')
+  end
+
+  def scheme_delimiter
+    scheme == 'mailto' ? ':' : '://'
+  end
+
+  def user_delimiter
+     '@' unless @user.nil?
+  end
+
+  def port_delimiter
+     ':' unless @port.nil?
+  end
+
+  def query_delimiter
+     '?' unless @query.nil?
+  end
+
+  def uri
+    [scheme, scheme_delimiter, user, user_delimiter, host, port_delimiter, port, path, 
+      query_delimiter, query].compact.join
+  end
+
+end
+
+# uri with mandatory scheme but less restrictive hostname, like
+# http://localhost:2500/blah.html
+class LocalURIChunk < URIChunk
+
+  unless defined? LocalURIChunk::LOCAL_URI_REGEXP
+    # hostname can be just a simple word like 'localhost'
+    ANY_HOSTNAME = "(?:#{DOMLABEL}\\.)*#{TOPLABEL}\\.?"
+    
+    # The basic URI expression as a string
+    # Scheme and hostname are mandatory
+    LOCAL_URI =
+        "(?:(#{SCHEME})://)+" +       # Mandatory scheme://     (\1)
+        "(?:(#{USERINFO})@)?" +       # Optional userinfo@      (\2)
+        "(#{ANY_HOSTNAME})" +         # Mandatory hostname      (\3)
+        "(?::(#{PORT}))?" +           # Optional :port          (\4)
+        "(#{ABS_PATH})?"  +           # Optional absolute path  (\5)
+        "(?:\\?(#{QUERY}))?" +        # Optional ?query         (\6)
+        "(?:\\#(#{FRAGMENT}))?" +     # Optional #fragment      (\7)
+        '(?=\.?(?:\s|\)|\z))'         # ends only with optional dot + space or ")" 
+                                      # or end of the string
+  
+    LOCAL_URI_REGEXP = Regexp.new(SUSPICIOUS_PRECEDING_CHARACTER + LOCAL_URI, Regexp::EXTENDED, 'N')
+  end
+
+  def LocalURIChunk.pattern
+    LOCAL_URI_REGEXP
+  end
+
+end
--- a/lib/chunks/wiki.rb
+++ b/lib/chunks/wiki.rb
@ -0,0 +1,141 @@
+require 'wiki_words'
+require 'chunks/chunk'
+require 'chunks/wiki'
+require 'cgi'
+
+# Contains all the methods for finding and replacing wiki related links.
+module WikiChunk
+  include Chunk
+
+  # A wiki reference is the top-level class for anything that refers to
+  # another wiki page.
+  class WikiReference < Chunk::Abstract
+
+    # Name of the referenced page
+    attr_reader :page_name
+    
+    # the referenced page
+    def refpage
+      @content.web.pages[@page_name]
+    end
+  
+  end
+
+  # A wiki link is the top-level class for links that refers to
+  # another wiki page.
+  class WikiLink < WikiReference
+ 
+    attr_reader :link_text, :link_type
+
+    def initialize(match_data, content)
+      super
+      @link_type = :show
+    end
+
+    def self.apply_to(content)
+      content.gsub!( self.pattern ) do |matched_text|
+        chunk = self.new($~, content)
+        if chunk.textile_url?
+          # do not substitute
+          matched_text
+        else
+          content.add_chunk(chunk)
+          chunk.mask
+        end
+      end
+    end
+
+    # the referenced page
+    def refpage
+      @content.web.pages[@page_name]
+    end
+
+    def textile_url?
+      not @textile_link_suffix.nil?
+    end
+
+  end
+
+  # This chunk matches a WikiWord. WikiWords can be escaped
+  # by prepending a '\'. When this is the case, the +escaped_text+
+  # method will return the WikiWord instead of the usual +nil+.
+  # The +page_name+ method returns the matched WikiWord.
+  class Word < WikiLink
+
+    attr_reader :escaped_text
+    
+    unless defined? WIKI_WORD
+      WIKI_WORD = Regexp.new('(":)?(\\\\)?(' + WikiWords::WIKI_WORD_PATTERN + ')\b', 0, "utf-8")
+    end
+
+    def self.pattern
+      WIKI_WORD
+    end
+
+    def initialize(match_data, content)
+      super
+      @textile_link_suffix, @escape, @page_name = match_data[1..3]
+      if @escape 
+        @unmask_mode = :escape
+        @escaped_text = @page_name
+      else
+        @escaped_text = nil
+      end
+      @link_text = WikiWords.separate(@page_name)
+      @unmask_text = (@escaped_text || @content.page_link(@page_name, @link_text, @link_type))
+    end
+
+  end
+
+  # This chunk handles [[bracketted wiki words]] and 
+  # [[AliasedWords|aliased wiki words]]. The first part of an
+  # aliased wiki word must be a WikiWord. If the WikiWord
+  # is aliased, the +link_text+ field will contain the
+  # alias, otherwise +link_text+ will contain the entire
+  # contents within the double brackets.
+  #
+  # NOTE: This chunk must be tested before WikiWord since
+  #       a WikiWords can be a substring of a WikiLink. 
+  class Link < WikiLink
+    
+    unless defined? WIKI_LINK
+      WIKI_LINK = /(":)?\[\[\s*([^\]\s][^\]]+?)\s*\]\]/
+      LINK_TYPE_SEPARATION = Regexp.new('^(.+):((file)|(pic))$', 0, 'utf-8')
+      ALIAS_SEPARATION = Regexp.new('^(.+)\|(.+)$', 0, 'utf-8')
+    end    
+        
+    def self.pattern() WIKI_LINK end
+
+    def initialize(match_data, content)
+      super
+      @textile_link_suffix, @page_name = match_data[1..2]
+      @link_text = @page_name
+      separate_link_type
+      separate_alias
+      @unmask_text = @content.page_link(@page_name, @link_text, @link_type)
+    end
+
+    private
+
+    # if link wihin the brackets has a form of [[filename:file]] or [[filename:pic]], 
+    # this means a link to a picture or a file
+    def separate_link_type
+      link_type_match = LINK_TYPE_SEPARATION.match(@page_name)
+      if link_type_match
+        @link_text = @page_name = link_type_match[1]
+        @link_type = link_type_match[2..3].compact[0].to_sym
+      end
+    end
+
+    # link text may be different from page name. this will look like [[actual page|link text]]
+    def separate_alias
+      alias_match = ALIAS_SEPARATION.match(@page_name)
+      if alias_match
+        @page_name, @link_text = alias_match[1..2]
+      end
+      # note that [[filename|link text:file]] is also supported
+    end  
+  
+  end
+  
+end