added chunks

This commit is contained in:
Rick Okin 2005-08-09 02:07:39 +00:00
parent 8c331d1019
commit c4b7b2d9f2
9 changed files with 621 additions and 0 deletions

33
lib/chunks/category.rb Normal file
View file

@ -0,0 +1,33 @@
require 'chunks/chunk'
# The category chunk looks for "category: news" on a line by
# itself and parses the terms after the ':' as categories.
# Other classes can search for Category chunks within
# rendered content to find out what categories this page
# should be in.
#
# Category lines can be hidden using ':category: news', for example
class Category < Chunk::Abstract
CATEGORY_PATTERN = /^(:)?category\s*:(.*)$/i
def self.pattern() CATEGORY_PATTERN end
attr_reader :hidden, :list
def initialize(match_data, content)
super(match_data, content)
@hidden = match_data[1]
@list = match_data[2].split(',').map { |c| c.strip }
@unmask_text = ''
if @hidden
@unmask_text = ''
else
category_urls = @list.map { |category| url(category) }.join(', ')
@unmask_text = '<div class="property"> category: ' + category_urls + '</div>'
end
end
# TODO move presentation of page metadata to controller/view
def url(category)
%{<a class="category_link" href="../list/?category=#{category}">#{category}</a>}
end
end

86
lib/chunks/chunk.rb Normal file
View file

@ -0,0 +1,86 @@
require 'uri/common'
# A chunk is a pattern of text that can be protected
# and interrogated by a renderer. Each Chunk class has a
# +pattern+ that states what sort of text it matches.
# Chunks are initalized by passing in the result of a
# match by its pattern.
module Chunk
class Abstract
# automatically construct the array of derivatives of Chunk::Abstract
@derivatives = []
class << self
attr_reader :derivatives
end
def self::inherited( klass )
Abstract::derivatives << klass
end
# the class name part of the mask strings
def self.mask_string
self.to_s.delete(':').downcase
end
# a regexp that matches all chunk_types masks
def Abstract::mask_re(chunk_types)
tmp = chunk_types.map{|klass| klass.mask_string}.join("|")
Regexp.new("chunk([0-9a-f]+n\\d+)(#{tmp})chunk")
end
attr_reader :text, :unmask_text, :unmask_mode
def initialize(match_data, content)
@text = match_data[0]
@content = content
@unmask_mode = :normal
end
# Find all the chunks of the given type in content
# Each time the pattern is matched, create a new
# chunk for it, and replace the occurance of the chunk
# in this content with its mask.
def self.apply_to(content)
content.gsub!( self.pattern ) do |match|
new_chunk = self.new($~, content)
content.add_chunk(new_chunk)
new_chunk.mask
end
end
# should contain only [a-z0-9]
def mask
@mask ||="chunk#{@id}#{self.class.mask_string}chunk"
end
# We should not use object_id because object_id is not guarantied
# to be unique when we restart the wiki (new object ids can equal old ones
# that were restored from madeleine storage)
def id
@id ||= "#{@content.page_id}n#{@content.chunk_id}"
end
def unmask
@content.sub!(mask, @unmask_text)
end
def rendered?
@unmask_mode == :normal
end
def escaped?
@unmask_mode == :escape
end
def revert
@content.sub!(mask, @text)
# unregister
@content.delete_chunk(self)
end
end
end

61
lib/chunks/engines.rb Normal file
View file

@ -0,0 +1,61 @@
$: << File.dirname(__FILE__) + "../../lib"
require 'redcloth'
require 'bluecloth_tweaked'
require 'rdocsupport'
require 'chunks/chunk'
# The markup engines are Chunks that call the one of RedCloth
# or RDoc to convert text. This markup occurs when the chunk is required
# to mask itself.
module Engines
class AbstractEngine < Chunk::Abstract
# Create a new chunk for the whole content and replace it with its mask.
def self.apply_to(content)
new_chunk = self.new(content)
content.replace(new_chunk.mask)
end
private
# Never create engines by constructor - use apply_to instead
def initialize(content)
@content = content
end
end
class Textile < AbstractEngine
def mask
redcloth = RedCloth.new(@content, [:hard_breaks] + @content.options[:engine_opts])
redcloth.filter_html = false
redcloth.no_span_caps = false
redcloth.to_html(:textile)
end
end
class Markdown < AbstractEngine
def mask
BlueCloth.new(@content, @content.options[:engine_opts]).to_html
end
end
class Mixed < AbstractEngine
def mask
redcloth = RedCloth.new(@content, @content.options[:engine_opts])
redcloth.filter_html = false
redcloth.no_span_caps = false
redcloth.to_html
end
end
class RDoc < AbstractEngine
def mask
RDocSupport::RDocFormatter.new(@content).to_html
end
end
MAP = { :textile => Textile, :markdown => Markdown, :mixed => Mixed, :rdoc => RDoc }
MAP.default = Textile
end

41
lib/chunks/include.rb Normal file
View file

@ -0,0 +1,41 @@
require 'chunks/wiki'
# Includes the contents of another page for rendering.
# The include command looks like this: "[[!include PageName]]".
# It is a WikiReference since it refers to another page (PageName)
# and the wiki content using this command must be notified
# of changes to that page.
# If the included page could not be found, a warning is displayed.
class Include < WikiChunk::WikiReference
INCLUDE_PATTERN = /\[\[!include\s+(.*?)\]\]\s*/i
def self.pattern() INCLUDE_PATTERN end
def initialize(match_data, content)
super
@page_name = match_data[1].strip
@unmask_text = get_unmask_text_avoiding_recursion_loops
end
private
def get_unmask_text_avoiding_recursion_loops
if refpage then
refpage.clear_display_cache
if refpage.wiki_includes.include?(@content.page_name)
# this will break the recursion
@content.delete_chunk(self)
return "<em>Recursive include detected; #{@page_name} --> #{@content.page_name} " +
"--> #{@page_name}</em>\n"
else
@content.merge_chunks(refpage.display_content)
return refpage.display_content.pre_rendered
end
else
return "<em>Could not include #{@page_name}</em>\n"
end
end
end

31
lib/chunks/literal.rb Normal file
View file

@ -0,0 +1,31 @@
require 'chunks/chunk'
# These are basic chunks that have a pattern and can be protected.
# They are used by rendering process to prevent wiki rendering
# occuring within literal areas such as <code> and <pre> blocks
# and within HTML tags.
module Literal
class AbstractLiteral < Chunk::Abstract
def initialize(match_data, content)
super
@unmask_text = @text
end
end
# A literal chunk that protects 'code' and 'pre' tags from wiki rendering.
class Pre < AbstractLiteral
PRE_BLOCKS = "a|pre|code"
PRE_PATTERN = Regexp.new('<('+PRE_BLOCKS+')\b[^>]*?>.*?</\1>', Regexp::MULTILINE)
def self.pattern() PRE_PATTERN end
end
# A literal chunk that protects HTML tags from wiki rendering.
class Tags < AbstractLiteral
TAGS = "a|img|em|strong|div|span|table|td|th|ul|ol|li|dl|dt|dd"
TAGS_PATTERN = Regexp.new('<(?:'+TAGS+')[^>]*?>', Regexp::MULTILINE)
def self.pattern() TAGS_PATTERN end
end
end

28
lib/chunks/nowiki.rb Normal file
View file

@ -0,0 +1,28 @@
require 'chunks/chunk'
# This chunks allows certain parts of a wiki page to be hidden from the
# rest of the rendering pipeline. It should be run at the beginning
# of the pipeline in `wiki_content.rb`.
#
# An example use of this chunk is to markup double brackets or
# auto URI links:
# <nowiki>Here are [[double brackets]] and a URI: www.uri.org</nowiki>
#
# The contents of the chunks will not be processed by any other chunk
# so the `www.uri.org` and the double brackets will appear verbatim.
#
# Author: Mark Reid <mark at threewordslong dot com>
# Created: 8th June 2004
class NoWiki < Chunk::Abstract
NOWIKI_PATTERN = Regexp.new('<nowiki>(.*?)</nowiki>', Regexp::MULTILINE)
def self.pattern() NOWIKI_PATTERN end
attr_reader :plain_text
def initialize(match_data, content)
super
@plain_text = @unmask_text = match_data[1]
end
end

18
lib/chunks/test.rb Normal file
View file

@ -0,0 +1,18 @@
require 'test/unit'
class ChunkTest < Test::Unit::TestCase
# Asserts a number of tests for the given type and text.
def match(type, test_text, expected)
pattern = type.pattern
assert_match(pattern, test_text)
pattern =~ test_text # Previous assertion guarantees match
chunk = type.new($~)
# Test if requested parts are correct.
for method_sym, value in expected do
assert_respond_to(chunk, method_sym)
assert_equal(value, chunk.method(method_sym).call, "Checking value of '#{method_sym}'")
end
end
end

182
lib/chunks/uri.rb Normal file
View file

@ -0,0 +1,182 @@
require 'chunks/chunk'
# This wiki chunk matches arbitrary URIs, using patterns from the Ruby URI modules.
# It parses out a variety of fields that could be used by renderers to format
# the links in various ways (shortening domain names, hiding email addresses)
# It matches email addresses and host.com.au domains without schemes (http://)
# but adds these on as required.
#
# The heuristic used to match a URI is designed to err on the side of caution.
# That is, it is more likely to not autolink a URI than it is to accidently
# autolink something that is not a URI. The reason behind this is it is easier
# to force a URI link by prefixing 'http://' to it than it is to escape and
# incorrectly marked up non-URI.
#
# I'm using a part of the [ISO 3166-1 Standard][iso3166] for country name suffixes.
# The generic names are from www.bnoack.com/data/countrycode2.html)
# [iso3166]: http://geotags.com/iso3166/
class URIChunk < Chunk::Abstract
include URI::REGEXP::PATTERN
# this condition is to get rid of pesky warnings in tests
unless defined? URIChunk::INTERNET_URI_REGEXP
GENERIC = 'aero|biz|com|coop|edu|gov|info|int|mil|museum|name|net|org'
COUNTRY = 'ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|az|ba|bb|bd|be|' +
'bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cf|cd|cg|ch|ci|ck|cl|' +
'cm|cn|co|cr|cs|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|fi|' +
'fj|fk|fm|fo|fr|fx|ga|gb|gd|ge|gf|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|' +
'hk|hm|hn|hr|ht|hu|id|ie|il|in|io|iq|ir|is|it|jm|jo|jp|ke|kg|kh|ki|km|kn|' +
'kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|mg|mh|mk|ml|mm|' +
'mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nt|' +
'nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|pt|pw|py|qa|re|ro|ru|rw|sa|sb|sc|' +
'sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|' +
'tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|' +
'ws|ye|yt|yu|za|zm|zr|zw'
# These are needed otherwise HOST will match almost anything
TLDS = "(?:#{GENERIC}|#{COUNTRY})"
# Redefine USERINFO so that it must have non-zero length
USERINFO = "(?:[#{UNRESERVED};:&=+$,]|#{ESCAPED})+"
# unreserved_no_ending = alphanum | mark, but URI_ENDING [)!] excluded
UNRESERVED_NO_ENDING = "-_.~*'(#{ALNUM}"
# this ensures that query or fragment do not end with URI_ENDING
# and enable us to use a much simpler self.pattern Regexp
# uric_no_ending = reserved | unreserved_no_ending | escaped
URIC_NO_ENDING = "(?:[#{UNRESERVED_NO_ENDING}#{RESERVED}]|#{ESCAPED})"
# query = *uric
QUERY = "#{URIC_NO_ENDING}*"
# fragment = *uric
FRAGMENT = "#{URIC_NO_ENDING}*"
# DOMLABEL is defined in the ruby uri library, TLDS is defined above
INTERNET_HOSTNAME = "(?:#{DOMLABEL}\\.)+#{TLDS}"
# Correct a typo bug in ruby 1.8.x lib/uri/common.rb
PORT = '\\d*'
INTERNET_URI =
"(?:(#{SCHEME}):/{0,2})?" + # Optional scheme: (\1)
"(?:(#{USERINFO})@)?" + # Optional userinfo@ (\2)
"(#{INTERNET_HOSTNAME})" + # Mandatory hostname (\3)
"(?::(#{PORT}))?" + # Optional :port (\4)
"(#{ABS_PATH})?" + # Optional absolute path (\5)
"(?:\\?(#{QUERY}))?" + # Optional ?query (\6)
"(?:\\#(#{FRAGMENT}))?" + # Optional #fragment (\7)
'(?=\.?(?:\s|\)|\z))' # ends only with optional dot + space or ")"
# or end of the string
SUSPICIOUS_PRECEDING_CHARACTER = '(!|\"\:|\"|\\\'|\]\()?' # any of !, ":, ", ', ](
INTERNET_URI_REGEXP =
Regexp.new(SUSPICIOUS_PRECEDING_CHARACTER + INTERNET_URI, Regexp::EXTENDED, 'N')
end
def URIChunk.pattern
INTERNET_URI_REGEXP
end
attr_reader :user, :host, :port, :path, :query, :fragment, :link_text
def self.apply_to(content)
content.gsub!( self.pattern ) do |matched_text|
chunk = self.new($~, content)
if chunk.avoid_autolinking?
# do not substitute nor register the chunk
matched_text
else
content.add_chunk(chunk)
chunk.mask
end
end
end
def initialize(match_data, content)
super
@link_text = match_data[0]
@suspicious_preceding_character = match_data[1]
@original_scheme, @user, @host, @port, @path, @query, @fragment = match_data[2..-1]
treat_trailing_character
@unmask_text = "<a href=\"#{uri}\">#{link_text}</a>"
end
def avoid_autolinking?
not @suspicious_preceding_character.nil?
end
def treat_trailing_character
# If the last character matched by URI pattern is in ! or ), this may be part of the markup,
# not a URL. We should handle it as such. It is possible to do it by a regexp, but
# much easier to do programmatically
last_char = @link_text[-1..-1]
if last_char == ')' or last_char == '!'
@trailing_punctuation = last_char
@link_text.chop!
[@original_scheme, @user, @host, @port, @path, @query, @fragment].compact.last.chop!
else
@trailing_punctuation = nil
end
end
def scheme
@original_scheme or (@user ? 'mailto' : 'http')
end
def scheme_delimiter
scheme == 'mailto' ? ':' : '://'
end
def user_delimiter
'@' unless @user.nil?
end
def port_delimiter
':' unless @port.nil?
end
def query_delimiter
'?' unless @query.nil?
end
def uri
[scheme, scheme_delimiter, user, user_delimiter, host, port_delimiter, port, path,
query_delimiter, query].compact.join
end
end
# uri with mandatory scheme but less restrictive hostname, like
# http://localhost:2500/blah.html
class LocalURIChunk < URIChunk
unless defined? LocalURIChunk::LOCAL_URI_REGEXP
# hostname can be just a simple word like 'localhost'
ANY_HOSTNAME = "(?:#{DOMLABEL}\\.)*#{TOPLABEL}\\.?"
# The basic URI expression as a string
# Scheme and hostname are mandatory
LOCAL_URI =
"(?:(#{SCHEME})://)+" + # Mandatory scheme:// (\1)
"(?:(#{USERINFO})@)?" + # Optional userinfo@ (\2)
"(#{ANY_HOSTNAME})" + # Mandatory hostname (\3)
"(?::(#{PORT}))?" + # Optional :port (\4)
"(#{ABS_PATH})?" + # Optional absolute path (\5)
"(?:\\?(#{QUERY}))?" + # Optional ?query (\6)
"(?:\\#(#{FRAGMENT}))?" + # Optional #fragment (\7)
'(?=\.?(?:\s|\)|\z))' # ends only with optional dot + space or ")"
# or end of the string
LOCAL_URI_REGEXP = Regexp.new(SUSPICIOUS_PRECEDING_CHARACTER + LOCAL_URI, Regexp::EXTENDED, 'N')
end
def LocalURIChunk.pattern
LOCAL_URI_REGEXP
end
end

141
lib/chunks/wiki.rb Normal file
View file

@ -0,0 +1,141 @@
require 'wiki_words'
require 'chunks/chunk'
require 'chunks/wiki'
require 'cgi'
# Contains all the methods for finding and replacing wiki related links.
module WikiChunk
include Chunk
# A wiki reference is the top-level class for anything that refers to
# another wiki page.
class WikiReference < Chunk::Abstract
# Name of the referenced page
attr_reader :page_name
# the referenced page
def refpage
@content.web.pages[@page_name]
end
end
# A wiki link is the top-level class for links that refers to
# another wiki page.
class WikiLink < WikiReference
attr_reader :link_text, :link_type
def initialize(match_data, content)
super
@link_type = :show
end
def self.apply_to(content)
content.gsub!( self.pattern ) do |matched_text|
chunk = self.new($~, content)
if chunk.textile_url?
# do not substitute
matched_text
else
content.add_chunk(chunk)
chunk.mask
end
end
end
# the referenced page
def refpage
@content.web.pages[@page_name]
end
def textile_url?
not @textile_link_suffix.nil?
end
end
# This chunk matches a WikiWord. WikiWords can be escaped
# by prepending a '\'. When this is the case, the +escaped_text+
# method will return the WikiWord instead of the usual +nil+.
# The +page_name+ method returns the matched WikiWord.
class Word < WikiLink
attr_reader :escaped_text
unless defined? WIKI_WORD
WIKI_WORD = Regexp.new('(":)?(\\\\)?(' + WikiWords::WIKI_WORD_PATTERN + ')\b', 0, "utf-8")
end
def self.pattern
WIKI_WORD
end
def initialize(match_data, content)
super
@textile_link_suffix, @escape, @page_name = match_data[1..3]
if @escape
@unmask_mode = :escape
@escaped_text = @page_name
else
@escaped_text = nil
end
@link_text = WikiWords.separate(@page_name)
@unmask_text = (@escaped_text || @content.page_link(@page_name, @link_text, @link_type))
end
end
# This chunk handles [[bracketted wiki words]] and
# [[AliasedWords|aliased wiki words]]. The first part of an
# aliased wiki word must be a WikiWord. If the WikiWord
# is aliased, the +link_text+ field will contain the
# alias, otherwise +link_text+ will contain the entire
# contents within the double brackets.
#
# NOTE: This chunk must be tested before WikiWord since
# a WikiWords can be a substring of a WikiLink.
class Link < WikiLink
unless defined? WIKI_LINK
WIKI_LINK = /(":)?\[\[\s*([^\]\s][^\]]+?)\s*\]\]/
LINK_TYPE_SEPARATION = Regexp.new('^(.+):((file)|(pic))$', 0, 'utf-8')
ALIAS_SEPARATION = Regexp.new('^(.+)\|(.+)$', 0, 'utf-8')
end
def self.pattern() WIKI_LINK end
def initialize(match_data, content)
super
@textile_link_suffix, @page_name = match_data[1..2]
@link_text = @page_name
separate_link_type
separate_alias
@unmask_text = @content.page_link(@page_name, @link_text, @link_type)
end
private
# if link wihin the brackets has a form of [[filename:file]] or [[filename:pic]],
# this means a link to a picture or a file
def separate_link_type
link_type_match = LINK_TYPE_SEPARATION.match(@page_name)
if link_type_match
@link_text = @page_name = link_type_match[1]
@link_type = link_type_match[2..3].compact[0].to_sym
end
end
# link text may be different from page name. this will look like [[actual page|link text]]
def separate_alias
alias_match = ALIAS_SEPARATION.match(@page_name)
if alias_match
@page_name, @link_text = alias_match[1..2]
end
# note that [[filename|link text:file]] is also supported
end
end
end