Moved Maruku (and its dependencies) and XHTMLDiff (and its dependencies) to vendor/plugins/ .

Synced with Instiki SVN.
This commit is contained in:
Jacques Distler 2007-02-10 23:03:15 -06:00
parent 64037c67ac
commit 63e217bcfd
59 changed files with 40 additions and 1 deletions

38
vendor/plugins/syntax/lib/syntax.rb vendored Normal file
View file

@ -0,0 +1,38 @@
require 'syntax/common'
module Syntax
# A default tokenizer for handling syntaxes that are not explicitly handled
# elsewhere. It simply yields the given text as a single token.
class Default
# Yield the given text as a single token.
def tokenize( text )
yield Token.new( text, :normal )
end
end
# A hash for registering syntax implementations.
SYNTAX = Hash.new( Default )
# Load the implementation of the requested syntax. If the syntax cannot be
# found, or if it cannot be loaded for whatever reason, the Default syntax
# handler will be returned.
def load( syntax )
begin
require "syntax/lang/#{syntax}"
rescue LoadError
end
SYNTAX[ syntax ].new
end
module_function :load
# Return an array of the names of supported syntaxes.
def all
lang_dir = File.join(File.dirname(__FILE__), "syntax", "lang")
Dir["#{lang_dir}/*.rb"].map { |path| File.basename(path, ".rb") }
end
module_function :all
end

View file

@ -0,0 +1,163 @@
require 'strscan'
module Syntax
# A single token extracted by a tokenizer. It is simply the lexeme
# itself, decorated with a 'group' attribute to identify the type of the
# lexeme.
class Token < String
# the type of the lexeme that was extracted.
attr_reader :group
# the instruction associated with this token (:none, :region_open, or
# :region_close)
attr_reader :instruction
# Create a new Token representing the given text, and belonging to the
# given group.
def initialize( text, group, instruction = :none )
super text
@group = group
@instruction = instruction
end
end
# The base class of all tokenizers. It sets up the scanner and manages the
# looping until all tokens have been extracted. It also provides convenience
# methods to make sure adjacent tokens of identical groups are returned as
# a single token.
class Tokenizer
# The current group being processed by the tokenizer
attr_reader :group
# The current chunk of text being accumulated
attr_reader :chunk
# Start tokenizing. This sets up the state in preparation for tokenization,
# such as creating a new scanner for the text and saving the callback block.
# The block will be invoked for each token extracted.
def start( text, &block )
@chunk = ""
@group = :normal
@callback = block
@text = StringScanner.new( text )
setup
end
# Subclasses may override this method to provide implementation-specific
# setup logic.
def setup
end
# Finish tokenizing. This flushes the buffer, yielding any remaining text
# to the client.
def finish
start_group nil
teardown
end
# Subclasses may override this method to provide implementation-specific
# teardown logic.
def teardown
end
# Subclasses must implement this method, which is called for each iteration
# of the tokenization process. This method may extract multiple tokens.
def step
raise NotImplementedError, "subclasses must implement #step"
end
# Begins tokenizing the given text, calling #step until the text has been
# exhausted.
def tokenize( text, &block )
start text, &block
step until @text.eos?
finish
end
# Specify a set of tokenizer-specific options. Each tokenizer may (or may
# not) publish any options, but if a tokenizer does those options may be
# used to specify optional behavior.
def set( opts={} )
( @options ||= Hash.new ).update opts
end
# Get the value of the specified option.
def option(opt)
@options ? @options[opt] : nil
end
private
EOL = /(?=\r\n?|\n|$)/
# A convenience for delegating method calls to the scanner.
def self.delegate( sym )
define_method( sym ) { |*a| @text.__send__( sym, *a ) }
end
delegate :bol?
delegate :eos?
delegate :scan
delegate :scan_until
delegate :check
delegate :check_until
delegate :getch
delegate :matched
delegate :pre_match
delegate :peek
delegate :pos
# Access the n-th subgroup from the most recent match.
def subgroup(n)
@text[n]
end
# Append the given data to the currently active chunk.
def append( data )
@chunk << data
end
# Request that a new group be started. If the current group is the same
# as the group being requested, a new group will not be created. If a new
# group is created and the current chunk is not empty, the chunk's
# contents will be yielded to the client as a token, and then cleared.
#
# After the new group is started, if +data+ is non-nil it will be appended
# to the chunk.
def start_group( gr, data=nil )
flush_chunk if gr != @group
@group = gr
@chunk << data if data
end
def start_region( gr, data=nil )
flush_chunk
@group = gr
@callback.call( Token.new( data||"", @group, :region_open ) )
end
def end_region( gr, data=nil )
flush_chunk
@group = gr
@callback.call( Token.new( data||"", @group, :region_close ) )
end
def flush_chunk
@callback.call( Token.new( @chunk, @group ) ) unless @chunk.empty?
@chunk = ""
end
def subtokenize( syntax, text )
tokenizer = Syntax.load( syntax )
tokenizer.set @options if @options
flush_chunk
tokenizer.tokenize( text, &@callback )
end
end
end

View file

@ -0,0 +1,27 @@
require 'syntax'
module Syntax
module Convertors
# The abstract ancestor class for all convertors. It implements a few
# convenience methods to provide a common interface for all convertors.
class Abstract
# A reference to the tokenizer used by this convertor.
attr_reader :tokenizer
# A convenience method for instantiating a new convertor for a
# specific syntax.
def self.for_syntax( syntax )
new( Syntax.load( syntax ) )
end
# Creates a new convertor that uses the given tokenizer.
def initialize( tokenizer )
@tokenizer = tokenizer
end
end
end
end

View file

@ -0,0 +1,51 @@
require 'syntax/convertors/abstract'
module Syntax
module Convertors
# A simple class for converting a text into HTML.
class HTML < Abstract
# Converts the given text to HTML, using spans to represent token groups
# of any type but <tt>:normal</tt> (which is always unhighlighted). If
# +pre+ is +true+, the html is automatically wrapped in pre tags.
def convert( text, pre=true )
html = ""
html << "<pre>" if pre
regions = []
@tokenizer.tokenize( text ) do |tok|
value = html_escape(tok)
case tok.instruction
when :region_close then
regions.pop
html << "</span>"
when :region_open then
regions.push tok.group
html << "<span class=\"#{tok.group}\">#{value}"
else
if tok.group == ( regions.last || :normal )
html << value
else
html << "<span class=\"#{tok.group}\">#{value}</span>"
end
end
end
html << "</span>" while regions.pop
html << "</pre>" if pre
html
end
private
# Replaces some characters with their corresponding HTML entities.
def html_escape( string )
string.gsub( /&/, "&amp;" ).
gsub( /</, "&lt;" ).
gsub( />/, "&gt;" ).
gsub( /"/, "&quot;" )
end
end
end
end

View file

@ -0,0 +1,317 @@
require 'syntax'
module Syntax
# A tokenizer for the Ruby language. It recognizes all common syntax
# (and some less common syntax) but because it is not a true lexer, it
# will make mistakes on some ambiguous cases.
class Ruby < Tokenizer
# The list of all identifiers recognized as keywords.
KEYWORDS =
%w{if then elsif else end begin do rescue ensure while for
class module def yield raise until unless and or not when
case super undef break next redo retry in return alias
defined?}
# Perform ruby-specific setup
def setup
@selector = false
@allow_operator = false
@heredocs = []
end
# Step through a single iteration of the tokenization process.
def step
case
when bol? && check( /=begin/ )
start_group( :comment, scan_until( /^=end#{EOL}/ ) )
when bol? && check( /__END__#{EOL}/ )
start_group( :comment, scan_until( /\Z/ ) )
else
case
when check( /def\s+/ )
start_group :keyword, scan( /def\s+/ )
start_group :method, scan_until( /(?=[;(\s]|#{EOL})/ )
when check( /class\s+/ )
start_group :keyword, scan( /class\s+/ )
start_group :class, scan_until( /(?=[;\s<]|#{EOL})/ )
when check( /module\s+/ )
start_group :keyword, scan( /module\s+/ )
start_group :module, scan_until( /(?=[;\s]|#{EOL})/ )
when check( /::/ )
start_group :punct, scan(/::/)
when check( /:"/ )
start_group :symbol, scan(/:/)
scan_delimited_region :symbol, :symbol, "", true
@allow_operator = true
when check( /:'/ )
start_group :symbol, scan(/:/)
scan_delimited_region :symbol, :symbol, "", false
@allow_operator = true
when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ )
start_group :symbol, matched
@allow_operator = true
when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ )
start_group :char, matched
@allow_operator = true
when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
if @selector || matched[-1] == ?? || matched[-1] == ?!
start_group :ident,
scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/)
else
start_group :constant,
scan(/(__FILE__|__LINE__|true|false|nil|self)/)
end
@selector = false
@allow_operator = true
when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
start_group :number, matched
@allow_operator = true
else
case peek(2)
when "%r"
scan_delimited_region :punct, :regex, scan( /../ ), true
@allow_operator = true
when "%w", "%q"
scan_delimited_region :punct, :string, scan( /../ ), false
@allow_operator = true
when "%s"
scan_delimited_region :punct, :symbol, scan( /../ ), false
@allow_operator = true
when "%W", "%Q", "%x"
scan_delimited_region :punct, :string, scan( /../ ), true
@allow_operator = true
when /%[^\sa-zA-Z0-9]/
scan_delimited_region :punct, :string, scan( /./ ), true
@allow_operator = true
when "<<"
saw_word = ( chunk[-1,1] =~ /[\w!?]/ )
start_group :punct, scan( /<</ )
if saw_word
@allow_operator = false
return
end
float_right = scan( /-/ )
append "-" if float_right
if ( type = scan( /['"]/ ) )
append type
delim = scan_until( /(?=#{type})/ )
if delim.nil?
append scan_until( /\Z/ )
return
end
else
delim = scan( /\w+/ ) or return
end
start_group :constant, delim
start_group :punct, scan( /#{type}/ ) if type
@heredocs << [ float_right, type, delim ]
@allow_operator = true
else
case peek(1)
when /[\n\r]/
unless @heredocs.empty?
scan_heredoc(*@heredocs.shift)
else
start_group :normal, scan( /\s+/ )
end
@allow_operator = false
when /\s/
start_group :normal, scan( /\s+/ )
when "#"
start_group :comment, scan( /#[^\n\r]*/ )
when /[A-Z]/
start_group @selector ? :ident : :constant, scan( /\w+/ )
@allow_operator = true
when /[a-z_]/
word = scan( /\w+[?!]?/ )
if !@selector && KEYWORDS.include?( word )
start_group :keyword, word
@allow_operator = false
elsif
start_group :ident, word
@allow_operator = true
end
@selector = false
when /\d/
start_group :number,
scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ )
@allow_operator = true
when '"'
scan_delimited_region :punct, :string, "", true
@allow_operator = true
when '/'
if @allow_operator
start_group :punct, scan(%r{/})
@allow_operator = false
else
scan_delimited_region :punct, :regex, "", true
@allow_operator = true
end
when "'"
scan_delimited_region :punct, :string, "", false
@allow_operator = true
when "."
dots = scan( /\.{1,3}/ )
start_group :punct, dots
@selector = ( dots.length == 1 )
when /[@]/
start_group :attribute, scan( /@{1,2}\w*/ )
@allow_operator = true
when /[$]/
start_group :global, scan(/\$/)
start_group :global, scan( /\w+|./ ) if check(/./)
@allow_operator = true
when /[-!?*\/+=<>(\[\{}:;,&|%]/
start_group :punct, scan(/./)
@allow_operator = false
when /[)\]]/
start_group :punct, scan(/./)
@allow_operator = true
else
# all else just falls through this, to prevent
# infinite loops...
append getch
end
end
end
end
end
private
# Scan a delimited region of text. This handles the simple cases (strings
# delimited with quotes) as well as the more complex cases of %-strings
# and here-documents.
#
# * +delim_group+ is the group to use to classify the delimiters of the
# region
# * +inner_group+ is the group to use to classify the contents of the
# region
# * +starter+ is the text to use as the starting delimiter
# * +exprs+ is a boolean flag indicating whether the region is an
# interpolated string or not
# * +delim+ is the text to use as the delimiter of the region. If +nil+,
# the next character will be treated as the delimiter.
# * +heredoc+ is either +false+, meaning the region is not a heredoc, or
# <tt>:flush</tt> (meaning the delimiter must be flushed left), or
# <tt>:float</tt> (meaning the delimiter doens't have to be flush left).
def scan_delimited_region( delim_group, inner_group, starter, exprs,
delim=nil, heredoc=false )
# begin
if !delim
start_group delim_group, starter
delim = scan( /./ )
append delim
delim = case delim
when '{' then '}'
when '(' then ')'
when '[' then ']'
when '<' then '>'
else delim
end
end
start_region inner_group
items = "\\\\|"
if heredoc
items << "(^"
items << '\s*' if heredoc == :float
items << "#{Regexp.escape(delim)}\s*?)#{EOL}"
else
items << "#{Regexp.escape(delim)}"
end
items << "|#(\\$|@@?|\\{)" if exprs
items = Regexp.new( items )
loop do
p = pos
match = scan_until( items )
if match.nil?
start_group inner_group, scan_until( /\Z/ )
break
else
text = pre_match[p..-1]
start_group inner_group, text if text.length > 0
case matched.strip
when "\\"
unless exprs
case peek(1)
when "'"
scan(/./)
start_group :escape, "\\'"
when "\\"
scan(/./)
start_group :escape, "\\\\"
else
start_group inner_group, "\\"
end
else
start_group :escape, "\\"
c = getch
append c
case c
when 'x'
append scan( /[a-fA-F0-9]{1,2}/ )
when /[0-7]/
append scan( /[0-7]{0,2}/ )
end
end
when delim
end_region inner_group
start_group delim_group, matched
break
when /^#/
do_highlight = (option(:expressions) == :highlight)
start_region :expr if do_highlight
start_group :expr, matched
case matched[1]
when ?{
depth = 1
content = ""
while depth > 0
p = pos
c = scan_until( /[\{}]/ )
if c.nil?
content << scan_until( /\Z/ )
break
else
depth += ( matched == "{" ? 1 : -1 )
content << pre_match[p..-1]
content << matched if depth > 0
end
end
if do_highlight
subtokenize "ruby", content
start_group :expr, "}"
else
append content + "}"
end
when ?$, ?@
append scan( /\w+/ )
end
end_region :expr if do_highlight
else raise "unexpected match on #{matched}"
end
end
end
end
# Scan a heredoc beginning at the current position.
#
# * +float+ indicates whether the delimiter may be floated to the right
# * +type+ is +nil+, a single quote, or a double quote
# * +delim+ is the delimiter to look for
def scan_heredoc(float, type, delim)
scan_delimited_region( :constant, :string, "", type != "'",
delim, float ? :float : :flush )
end
end
SYNTAX["ruby"] = Ruby
end

View file

@ -0,0 +1,108 @@
require 'syntax'
module Syntax
# A simple implementation of an XML lexer. It handles most cases. It is
# not a validating lexer, meaning it will happily process invalid XML without
# complaining.
class XML < Tokenizer
# Initialize the lexer.
def setup
@in_tag = false
end
# Step through a single iteration of the tokenization process. This will
# yield (potentially) many tokens, and possibly zero tokens.
def step
start_group :normal, matched if scan( /\s+/ )
if @in_tag
case
when scan( /([-\w]+):([-\w]+)/ )
start_group :namespace, subgroup(1)
start_group :punct, ":"
start_group :attribute, subgroup(2)
when scan( /\d+/ )
start_group :number, matched
when scan( /[-\w]+/ )
start_group :attribute, matched
when scan( %r{[/?]?>} )
@in_tag = false
start_group :punct, matched
when scan( /=/ )
start_group :punct, matched
when scan( /["']/ )
scan_string matched
else
append getch
end
elsif ( text = scan_until( /(?=[<&])/ ) )
start_group :normal, text unless text.empty?
if scan(/<!--.*?(-->|\Z)/m)
start_group :comment, matched
else
case peek(1)
when "<"
start_group :punct, getch
case peek(1)
when "?"
append getch
when "/"
append getch
when "!"
append getch
end
start_group :normal, matched if scan( /\s+/ )
if scan( /([-\w]+):([-\w]+)/ )
start_group :namespace, subgroup(1)
start_group :punct, ":"
start_group :tag, subgroup(2)
elsif scan( /[-\w]+/ )
start_group :tag, matched
end
@in_tag = true
when "&"
if scan( /&\S{1,10};/ )
start_group :entity, matched
else
start_group :normal, scan( /&/ )
end
end
end
else
append scan_until( /\Z/ )
end
end
private
# Scan the string starting at the current position, with the given
# delimiter character.
def scan_string( delim )
start_group :punct, delim
match = /(?=[&\\]|#{delim})/
loop do
break unless ( text = scan_until( match ) )
start_group :string, text unless text.empty?
case peek(1)
when "&"
if scan( /&\S{1,10};/ )
start_group :entity, matched
else
start_group :string, getch
end
when "\\"
start_group :string, getch
append getch || ""
when delim
start_group :punct, getch
break
end
end
end
end
SYNTAX["xml"] = XML
end

View file

@ -0,0 +1,105 @@
require 'syntax'
module Syntax
# A simple implementation of an YAML lexer. It handles most cases. It is
# not a validating lexer.
class YAML < Tokenizer
# Step through a single iteration of the tokenization process. This will
# yield (potentially) many tokens, and possibly zero tokens.
def step
if bol?
case
when scan(/---(\s*.+)?$/)
start_group :document, matched
when scan(/(\s*)([a-zA-Z][-\w]*)(\s*):/)
start_group :normal, subgroup(1)
start_group :key, subgroup(2)
start_group :normal, subgroup(3)
start_group :punct, ":"
when scan(/(\s*)-/)
start_group :normal, subgroup(1)
start_group :punct, "-"
when scan(/\s*$/)
start_group :normal, matched
when scan(/#.*$/)
start_group :comment, matched
else
append getch
end
else
case
when scan(/[\n\r]+/)
start_group :normal, matched
when scan(/[ \t]+/)
start_group :normal, matched
when scan(/!+(.*?^)?\S+/)
start_group :type, matched
when scan(/&\S+/)
start_group :anchor, matched
when scan(/\*\S+/)
start_group :ref, matched
when scan(/\d\d:\d\d:\d\d/)
start_group :time, matched
when scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/)
start_group :date, matched
when scan(/['"]/)
start_group :punct, matched
scan_string matched
when scan(/:\w+/)
start_group :symbol, matched
when scan(/[:]/)
start_group :punct, matched
when scan(/#.*$/)
start_group :comment, matched
when scan(/>-?/)
start_group :punct, matched
start_group :normal, scan(/.*$/)
append getch until eos? || bol?
return if eos?
indent = check(/ */)
start_group :string
loop do
line = check_until(/[\n\r]|\Z/)
break if line.nil?
if line.chomp.length > 0
this_indent = line.chomp.match( /^\s*/ )[0]
break if this_indent.length < indent.length
end
append scan_until(/[\n\r]|\Z/)
end
else
start_group :normal, scan_until(/(?=$|#)/)
end
end
end
private
def scan_string( delim )
regex = /(?=[#{delim=="'" ? "" : "\\\\"}#{delim}])/
loop do
text = scan_until( regex )
if text.nil?
start_group :string, scan_until( /\Z/ )
break
else
start_group :string, text unless text.empty?
end
case peek(1)
when "\\"
start_group :expr, scan(/../)
else
start_group :punct, getch
break
end
end
end
end
SYNTAX["yaml"] = YAML
end

View file

@ -0,0 +1,9 @@
module Syntax
module Version
MAJOR=1
MINOR=0
TINY=0
STRING=[MAJOR,MINOR,TINY].join('.')
end
end