diff --git a/lib/chunks/category.rb b/lib/chunks/category.rb new file mode 100644 index 00000000..d08d8636 --- /dev/null +++ b/lib/chunks/category.rb @@ -0,0 +1,33 @@ +require 'chunks/chunk' + +# The category chunk looks for "category: news" on a line by +# itself and parses the terms after the ':' as categories. +# Other classes can search for Category chunks within +# rendered content to find out what categories this page +# should be in. +# +# Category lines can be hidden using ':category: news', for example +class Category < Chunk::Abstract + CATEGORY_PATTERN = /^(:)?category\s*:(.*)$/i + def self.pattern() CATEGORY_PATTERN end + + attr_reader :hidden, :list + +def initialize(match_data, content) + super(match_data, content) + @hidden = match_data[1] + @list = match_data[2].split(',').map { |c| c.strip } + @unmask_text = '' + if @hidden + @unmask_text = '' + else + category_urls = @list.map { |category| url(category) }.join(', ') + @unmask_text = '
and blocks
+# and within HTML tags.
+module Literal
+
+ class AbstractLiteral < Chunk::Abstract
+
+ def initialize(match_data, content)
+ super
+ @unmask_text = @text
+ end
+
+ end
+
+ # A literal chunk that protects 'code' and 'pre' tags from wiki rendering.
+ class Pre < AbstractLiteral
+ PRE_BLOCKS = "a|pre|code"
+ PRE_PATTERN = Regexp.new('<('+PRE_BLOCKS+')\b[^>]*?>.*?\1>', Regexp::MULTILINE)
+ def self.pattern() PRE_PATTERN end
+ end
+
+ # A literal chunk that protects HTML tags from wiki rendering.
+ class Tags < AbstractLiteral
+ TAGS = "a|img|em|strong|div|span|table|td|th|ul|ol|li|dl|dt|dd"
+ TAGS_PATTERN = Regexp.new('<(?:'+TAGS+')[^>]*?>', Regexp::MULTILINE)
+ def self.pattern() TAGS_PATTERN end
+ end
+end
diff --git a/lib/chunks/nowiki.rb b/lib/chunks/nowiki.rb
new file mode 100644
index 00000000..ef99ec0b
--- /dev/null
+++ b/lib/chunks/nowiki.rb
@@ -0,0 +1,28 @@
+require 'chunks/chunk'
+
+# This chunks allows certain parts of a wiki page to be hidden from the
+# rest of the rendering pipeline. It should be run at the beginning
+# of the pipeline in `wiki_content.rb`.
+#
+# An example use of this chunk is to markup double brackets or
+# auto URI links:
+# Here are [[double brackets]] and a URI: www.uri.org
+#
+# The contents of the chunks will not be processed by any other chunk
+# so the `www.uri.org` and the double brackets will appear verbatim.
+#
+# Author: Mark Reid
+# Created: 8th June 2004
+class NoWiki < Chunk::Abstract
+
+ NOWIKI_PATTERN = Regexp.new('(.*?) ', Regexp::MULTILINE)
+ def self.pattern() NOWIKI_PATTERN end
+
+ attr_reader :plain_text
+
+ def initialize(match_data, content)
+ super
+ @plain_text = @unmask_text = match_data[1]
+ end
+
+end
diff --git a/lib/chunks/test.rb b/lib/chunks/test.rb
new file mode 100644
index 00000000..edf77d14
--- /dev/null
+++ b/lib/chunks/test.rb
@@ -0,0 +1,18 @@
+require 'test/unit'
+
+class ChunkTest < Test::Unit::TestCase
+
+ # Asserts a number of tests for the given type and text.
+ def match(type, test_text, expected)
+ pattern = type.pattern
+ assert_match(pattern, test_text)
+ pattern =~ test_text # Previous assertion guarantees match
+ chunk = type.new($~)
+
+ # Test if requested parts are correct.
+ for method_sym, value in expected do
+ assert_respond_to(chunk, method_sym)
+ assert_equal(value, chunk.method(method_sym).call, "Checking value of '#{method_sym}'")
+ end
+ end
+end
diff --git a/lib/chunks/uri.rb b/lib/chunks/uri.rb
new file mode 100644
index 00000000..1a208535
--- /dev/null
+++ b/lib/chunks/uri.rb
@@ -0,0 +1,182 @@
+require 'chunks/chunk'
+
+# This wiki chunk matches arbitrary URIs, using patterns from the Ruby URI modules.
+# It parses out a variety of fields that could be used by renderers to format
+# the links in various ways (shortening domain names, hiding email addresses)
+# It matches email addresses and host.com.au domains without schemes (http://)
+# but adds these on as required.
+#
+# The heuristic used to match a URI is designed to err on the side of caution.
+# That is, it is more likely to not autolink a URI than it is to accidently
+# autolink something that is not a URI. The reason behind this is it is easier
+# to force a URI link by prefixing 'http://' to it than it is to escape and
+# incorrectly marked up non-URI.
+#
+# I'm using a part of the [ISO 3166-1 Standard][iso3166] for country name suffixes.
+# The generic names are from www.bnoack.com/data/countrycode2.html)
+# [iso3166]: http://geotags.com/iso3166/
+
+class URIChunk < Chunk::Abstract
+ include URI::REGEXP::PATTERN
+
+ # this condition is to get rid of pesky warnings in tests
+ unless defined? URIChunk::INTERNET_URI_REGEXP
+
+ GENERIC = 'aero|biz|com|coop|edu|gov|info|int|mil|museum|name|net|org'
+
+ COUNTRY = 'ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|az|ba|bb|bd|be|' +
+ 'bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cf|cd|cg|ch|ci|ck|cl|' +
+ 'cm|cn|co|cr|cs|cu|cv|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|fi|' +
+ 'fj|fk|fm|fo|fr|fx|ga|gb|gd|ge|gf|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|' +
+ 'hk|hm|hn|hr|ht|hu|id|ie|il|in|io|iq|ir|is|it|jm|jo|jp|ke|kg|kh|ki|km|kn|' +
+ 'kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|mg|mh|mk|ml|mm|' +
+ 'mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nt|' +
+ 'nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|pt|pw|py|qa|re|ro|ru|rw|sa|sb|sc|' +
+ 'sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|' +
+ 'tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|' +
+ 'ws|ye|yt|yu|za|zm|zr|zw'
+ # These are needed otherwise HOST will match almost anything
+ TLDS = "(?:#{GENERIC}|#{COUNTRY})"
+
+ # Redefine USERINFO so that it must have non-zero length
+ USERINFO = "(?:[#{UNRESERVED};:&=+$,]|#{ESCAPED})+"
+
+ # unreserved_no_ending = alphanum | mark, but URI_ENDING [)!] excluded
+ UNRESERVED_NO_ENDING = "-_.~*'(#{ALNUM}"
+
+ # this ensures that query or fragment do not end with URI_ENDING
+ # and enable us to use a much simpler self.pattern Regexp
+
+ # uric_no_ending = reserved | unreserved_no_ending | escaped
+ URIC_NO_ENDING = "(?:[#{UNRESERVED_NO_ENDING}#{RESERVED}]|#{ESCAPED})"
+ # query = *uric
+ QUERY = "#{URIC_NO_ENDING}*"
+ # fragment = *uric
+ FRAGMENT = "#{URIC_NO_ENDING}*"
+
+ # DOMLABEL is defined in the ruby uri library, TLDS is defined above
+ INTERNET_HOSTNAME = "(?:#{DOMLABEL}\\.)+#{TLDS}"
+
+ # Correct a typo bug in ruby 1.8.x lib/uri/common.rb
+ PORT = '\\d*'
+
+ INTERNET_URI =
+ "(?:(#{SCHEME}):/{0,2})?" + # Optional scheme: (\1)
+ "(?:(#{USERINFO})@)?" + # Optional userinfo@ (\2)
+ "(#{INTERNET_HOSTNAME})" + # Mandatory hostname (\3)
+ "(?::(#{PORT}))?" + # Optional :port (\4)
+ "(#{ABS_PATH})?" + # Optional absolute path (\5)
+ "(?:\\?(#{QUERY}))?" + # Optional ?query (\6)
+ "(?:\\#(#{FRAGMENT}))?" + # Optional #fragment (\7)
+ '(?=\.?(?:\s|\)|\z))' # ends only with optional dot + space or ")"
+ # or end of the string
+
+ SUSPICIOUS_PRECEDING_CHARACTER = '(!|\"\:|\"|\\\'|\]\()?' # any of !, ":, ", ', ](
+
+ INTERNET_URI_REGEXP =
+ Regexp.new(SUSPICIOUS_PRECEDING_CHARACTER + INTERNET_URI, Regexp::EXTENDED, 'N')
+
+ end
+
+ def URIChunk.pattern
+ INTERNET_URI_REGEXP
+ end
+
+ attr_reader :user, :host, :port, :path, :query, :fragment, :link_text
+
+ def self.apply_to(content)
+ content.gsub!( self.pattern ) do |matched_text|
+ chunk = self.new($~, content)
+ if chunk.avoid_autolinking?
+ # do not substitute nor register the chunk
+ matched_text
+ else
+ content.add_chunk(chunk)
+ chunk.mask
+ end
+ end
+ end
+
+ def initialize(match_data, content)
+ super
+ @link_text = match_data[0]
+ @suspicious_preceding_character = match_data[1]
+ @original_scheme, @user, @host, @port, @path, @query, @fragment = match_data[2..-1]
+ treat_trailing_character
+ @unmask_text = "#{link_text}"
+ end
+
+ def avoid_autolinking?
+ not @suspicious_preceding_character.nil?
+ end
+
+ def treat_trailing_character
+ # If the last character matched by URI pattern is in ! or ), this may be part of the markup,
+ # not a URL. We should handle it as such. It is possible to do it by a regexp, but
+ # much easier to do programmatically
+ last_char = @link_text[-1..-1]
+ if last_char == ')' or last_char == '!'
+ @trailing_punctuation = last_char
+ @link_text.chop!
+ [@original_scheme, @user, @host, @port, @path, @query, @fragment].compact.last.chop!
+ else
+ @trailing_punctuation = nil
+ end
+ end
+
+ def scheme
+ @original_scheme or (@user ? 'mailto' : 'http')
+ end
+
+ def scheme_delimiter
+ scheme == 'mailto' ? ':' : '://'
+ end
+
+ def user_delimiter
+ '@' unless @user.nil?
+ end
+
+ def port_delimiter
+ ':' unless @port.nil?
+ end
+
+ def query_delimiter
+ '?' unless @query.nil?
+ end
+
+ def uri
+ [scheme, scheme_delimiter, user, user_delimiter, host, port_delimiter, port, path,
+ query_delimiter, query].compact.join
+ end
+
+end
+
+# uri with mandatory scheme but less restrictive hostname, like
+# http://localhost:2500/blah.html
+class LocalURIChunk < URIChunk
+
+ unless defined? LocalURIChunk::LOCAL_URI_REGEXP
+ # hostname can be just a simple word like 'localhost'
+ ANY_HOSTNAME = "(?:#{DOMLABEL}\\.)*#{TOPLABEL}\\.?"
+
+ # The basic URI expression as a string
+ # Scheme and hostname are mandatory
+ LOCAL_URI =
+ "(?:(#{SCHEME})://)+" + # Mandatory scheme:// (\1)
+ "(?:(#{USERINFO})@)?" + # Optional userinfo@ (\2)
+ "(#{ANY_HOSTNAME})" + # Mandatory hostname (\3)
+ "(?::(#{PORT}))?" + # Optional :port (\4)
+ "(#{ABS_PATH})?" + # Optional absolute path (\5)
+ "(?:\\?(#{QUERY}))?" + # Optional ?query (\6)
+ "(?:\\#(#{FRAGMENT}))?" + # Optional #fragment (\7)
+ '(?=\.?(?:\s|\)|\z))' # ends only with optional dot + space or ")"
+ # or end of the string
+
+ LOCAL_URI_REGEXP = Regexp.new(SUSPICIOUS_PRECEDING_CHARACTER + LOCAL_URI, Regexp::EXTENDED, 'N')
+ end
+
+ def LocalURIChunk.pattern
+ LOCAL_URI_REGEXP
+ end
+
+end
diff --git a/lib/chunks/wiki.rb b/lib/chunks/wiki.rb
new file mode 100644
index 00000000..840f644a
--- /dev/null
+++ b/lib/chunks/wiki.rb
@@ -0,0 +1,141 @@
+require 'wiki_words'
+require 'chunks/chunk'
+require 'chunks/wiki'
+require 'cgi'
+
+# Contains all the methods for finding and replacing wiki related links.
+module WikiChunk
+ include Chunk
+
+ # A wiki reference is the top-level class for anything that refers to
+ # another wiki page.
+ class WikiReference < Chunk::Abstract
+
+ # Name of the referenced page
+ attr_reader :page_name
+
+ # the referenced page
+ def refpage
+ @content.web.pages[@page_name]
+ end
+
+ end
+
+ # A wiki link is the top-level class for links that refers to
+ # another wiki page.
+ class WikiLink < WikiReference
+
+ attr_reader :link_text, :link_type
+
+ def initialize(match_data, content)
+ super
+ @link_type = :show
+ end
+
+ def self.apply_to(content)
+ content.gsub!( self.pattern ) do |matched_text|
+ chunk = self.new($~, content)
+ if chunk.textile_url?
+ # do not substitute
+ matched_text
+ else
+ content.add_chunk(chunk)
+ chunk.mask
+ end
+ end
+ end
+
+ # the referenced page
+ def refpage
+ @content.web.pages[@page_name]
+ end
+
+ def textile_url?
+ not @textile_link_suffix.nil?
+ end
+
+ end
+
+ # This chunk matches a WikiWord. WikiWords can be escaped
+ # by prepending a '\'. When this is the case, the +escaped_text+
+ # method will return the WikiWord instead of the usual +nil+.
+ # The +page_name+ method returns the matched WikiWord.
+ class Word < WikiLink
+
+ attr_reader :escaped_text
+
+ unless defined? WIKI_WORD
+ WIKI_WORD = Regexp.new('(":)?(\\\\)?(' + WikiWords::WIKI_WORD_PATTERN + ')\b', 0, "utf-8")
+ end
+
+ def self.pattern
+ WIKI_WORD
+ end
+
+ def initialize(match_data, content)
+ super
+ @textile_link_suffix, @escape, @page_name = match_data[1..3]
+ if @escape
+ @unmask_mode = :escape
+ @escaped_text = @page_name
+ else
+ @escaped_text = nil
+ end
+ @link_text = WikiWords.separate(@page_name)
+ @unmask_text = (@escaped_text || @content.page_link(@page_name, @link_text, @link_type))
+ end
+
+ end
+
+ # This chunk handles [[bracketted wiki words]] and
+ # [[AliasedWords|aliased wiki words]]. The first part of an
+ # aliased wiki word must be a WikiWord. If the WikiWord
+ # is aliased, the +link_text+ field will contain the
+ # alias, otherwise +link_text+ will contain the entire
+ # contents within the double brackets.
+ #
+ # NOTE: This chunk must be tested before WikiWord since
+ # a WikiWords can be a substring of a WikiLink.
+ class Link < WikiLink
+
+ unless defined? WIKI_LINK
+ WIKI_LINK = /(":)?\[\[\s*([^\]\s][^\]]+?)\s*\]\]/
+ LINK_TYPE_SEPARATION = Regexp.new('^(.+):((file)|(pic))$', 0, 'utf-8')
+ ALIAS_SEPARATION = Regexp.new('^(.+)\|(.+)$', 0, 'utf-8')
+ end
+
+ def self.pattern() WIKI_LINK end
+
+ def initialize(match_data, content)
+ super
+ @textile_link_suffix, @page_name = match_data[1..2]
+ @link_text = @page_name
+ separate_link_type
+ separate_alias
+ @unmask_text = @content.page_link(@page_name, @link_text, @link_type)
+ end
+
+ private
+
+ # if link wihin the brackets has a form of [[filename:file]] or [[filename:pic]],
+ # this means a link to a picture or a file
+ def separate_link_type
+ link_type_match = LINK_TYPE_SEPARATION.match(@page_name)
+ if link_type_match
+ @link_text = @page_name = link_type_match[1]
+ @link_type = link_type_match[2..3].compact[0].to_sym
+ end
+ end
+
+ # link text may be different from page name. this will look like [[actual page|link text]]
+ def separate_alias
+ alias_match = ALIAS_SEPARATION.match(@page_name)
+ if alias_match
+ @page_name, @link_text = alias_match[1..2]
+ end
+ # note that [[filename|link text:file]] is also supported
+ end
+
+ end
+
+end