Bring up to current.

This commit is contained in:
Jacques Distler 2007-01-22 08:36:51 -06:00
parent 69b62b6f33
commit b19e1e4f47
71 changed files with 8305 additions and 39 deletions

317
lib/syntax/lang/ruby.rb Normal file
View file

@ -0,0 +1,317 @@
require 'syntax'
module Syntax
# A tokenizer for the Ruby language. It recognizes all common syntax
# (and some less common syntax) but because it is not a true lexer, it
# will make mistakes on some ambiguous cases.
class Ruby < Tokenizer
# The list of all identifiers recognized as keywords.
KEYWORDS =
%w{if then elsif else end begin do rescue ensure while for
class module def yield raise until unless and or not when
case super undef break next redo retry in return alias
defined?}
# Perform ruby-specific setup
def setup
@selector = false
@allow_operator = false
@heredocs = []
end
# Step through a single iteration of the tokenization process.
def step
case
when bol? && check( /=begin/ )
start_group( :comment, scan_until( /^=end#{EOL}/ ) )
when bol? && check( /__END__#{EOL}/ )
start_group( :comment, scan_until( /\Z/ ) )
else
case
when check( /def\s+/ )
start_group :keyword, scan( /def\s+/ )
start_group :method, scan_until( /(?=[;(\s]|#{EOL})/ )
when check( /class\s+/ )
start_group :keyword, scan( /class\s+/ )
start_group :class, scan_until( /(?=[;\s<]|#{EOL})/ )
when check( /module\s+/ )
start_group :keyword, scan( /module\s+/ )
start_group :module, scan_until( /(?=[;\s]|#{EOL})/ )
when check( /::/ )
start_group :punct, scan(/::/)
when check( /:"/ )
start_group :symbol, scan(/:/)
scan_delimited_region :symbol, :symbol, "", true
@allow_operator = true
when check( /:'/ )
start_group :symbol, scan(/:/)
scan_delimited_region :symbol, :symbol, "", false
@allow_operator = true
when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ )
start_group :symbol, matched
@allow_operator = true
when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ )
start_group :char, matched
@allow_operator = true
when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
if @selector || matched[-1] == ?? || matched[-1] == ?!
start_group :ident,
scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/)
else
start_group :constant,
scan(/(__FILE__|__LINE__|true|false|nil|self)/)
end
@selector = false
@allow_operator = true
when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
start_group :number, matched
@allow_operator = true
else
case peek(2)
when "%r"
scan_delimited_region :punct, :regex, scan( /../ ), true
@allow_operator = true
when "%w", "%q"
scan_delimited_region :punct, :string, scan( /../ ), false
@allow_operator = true
when "%s"
scan_delimited_region :punct, :symbol, scan( /../ ), false
@allow_operator = true
when "%W", "%Q", "%x"
scan_delimited_region :punct, :string, scan( /../ ), true
@allow_operator = true
when /%[^\sa-zA-Z0-9]/
scan_delimited_region :punct, :string, scan( /./ ), true
@allow_operator = true
when "<<"
saw_word = ( chunk[-1,1] =~ /[\w!?]/ )
start_group :punct, scan( /<</ )
if saw_word
@allow_operator = false
return
end
float_right = scan( /-/ )
append "-" if float_right
if ( type = scan( /['"]/ ) )
append type
delim = scan_until( /(?=#{type})/ )
if delim.nil?
append scan_until( /\Z/ )
return
end
else
delim = scan( /\w+/ ) or return
end
start_group :constant, delim
start_group :punct, scan( /#{type}/ ) if type
@heredocs << [ float_right, type, delim ]
@allow_operator = true
else
case peek(1)
when /[\n\r]/
unless @heredocs.empty?
scan_heredoc(*@heredocs.shift)
else
start_group :normal, scan( /\s+/ )
end
@allow_operator = false
when /\s/
start_group :normal, scan( /\s+/ )
when "#"
start_group :comment, scan( /#[^\n\r]*/ )
when /[A-Z]/
start_group @selector ? :ident : :constant, scan( /\w+/ )
@allow_operator = true
when /[a-z_]/
word = scan( /\w+[?!]?/ )
if !@selector && KEYWORDS.include?( word )
start_group :keyword, word
@allow_operator = false
elsif
start_group :ident, word
@allow_operator = true
end
@selector = false
when /\d/
start_group :number,
scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ )
@allow_operator = true
when '"'
scan_delimited_region :punct, :string, "", true
@allow_operator = true
when '/'
if @allow_operator
start_group :punct, scan(%r{/})
@allow_operator = false
else
scan_delimited_region :punct, :regex, "", true
@allow_operator = true
end
when "'"
scan_delimited_region :punct, :string, "", false
@allow_operator = true
when "."
dots = scan( /\.{1,3}/ )
start_group :punct, dots
@selector = ( dots.length == 1 )
when /[@]/
start_group :attribute, scan( /@{1,2}\w*/ )
@allow_operator = true
when /[$]/
start_group :global, scan(/\$/)
start_group :global, scan( /\w+|./ ) if check(/./)
@allow_operator = true
when /[-!?*\/+=<>(\[\{}:;,&|%]/
start_group :punct, scan(/./)
@allow_operator = false
when /[)\]]/
start_group :punct, scan(/./)
@allow_operator = true
else
# all else just falls through this, to prevent
# infinite loops...
append getch
end
end
end
end
end
private
# Scan a delimited region of text. This handles the simple cases (strings
# delimited with quotes) as well as the more complex cases of %-strings
# and here-documents.
#
# * +delim_group+ is the group to use to classify the delimiters of the
# region
# * +inner_group+ is the group to use to classify the contents of the
# region
# * +starter+ is the text to use as the starting delimiter
# * +exprs+ is a boolean flag indicating whether the region is an
# interpolated string or not
# * +delim+ is the text to use as the delimiter of the region. If +nil+,
# the next character will be treated as the delimiter.
# * +heredoc+ is either +false+, meaning the region is not a heredoc, or
# <tt>:flush</tt> (meaning the delimiter must be flushed left), or
# <tt>:float</tt> (meaning the delimiter doens't have to be flush left).
def scan_delimited_region( delim_group, inner_group, starter, exprs,
delim=nil, heredoc=false )
# begin
if !delim
start_group delim_group, starter
delim = scan( /./ )
append delim
delim = case delim
when '{' then '}'
when '(' then ')'
when '[' then ']'
when '<' then '>'
else delim
end
end
start_region inner_group
items = "\\\\|"
if heredoc
items << "(^"
items << '\s*' if heredoc == :float
items << "#{Regexp.escape(delim)}\s*?)#{EOL}"
else
items << "#{Regexp.escape(delim)}"
end
items << "|#(\\$|@@?|\\{)" if exprs
items = Regexp.new( items )
loop do
p = pos
match = scan_until( items )
if match.nil?
start_group inner_group, scan_until( /\Z/ )
break
else
text = pre_match[p..-1]
start_group inner_group, text if text.length > 0
case matched.strip
when "\\"
unless exprs
case peek(1)
when "'"
scan(/./)
start_group :escape, "\\'"
when "\\"
scan(/./)
start_group :escape, "\\\\"
else
start_group inner_group, "\\"
end
else
start_group :escape, "\\"
c = getch
append c
case c
when 'x'
append scan( /[a-fA-F0-9]{1,2}/ )
when /[0-7]/
append scan( /[0-7]{0,2}/ )
end
end
when delim
end_region inner_group
start_group delim_group, matched
break
when /^#/
do_highlight = (option(:expressions) == :highlight)
start_region :expr if do_highlight
start_group :expr, matched
case matched[1]
when ?{
depth = 1
content = ""
while depth > 0
p = pos
c = scan_until( /[\{}]/ )
if c.nil?
content << scan_until( /\Z/ )
break
else
depth += ( matched == "{" ? 1 : -1 )
content << pre_match[p..-1]
content << matched if depth > 0
end
end
if do_highlight
subtokenize "ruby", content
start_group :expr, "}"
else
append content + "}"
end
when ?$, ?@
append scan( /\w+/ )
end
end_region :expr if do_highlight
else raise "unexpected match on #{matched}"
end
end
end
end
# Scan a heredoc beginning at the current position.
#
# * +float+ indicates whether the delimiter may be floated to the right
# * +type+ is +nil+, a single quote, or a double quote
# * +delim+ is the delimiter to look for
def scan_heredoc(float, type, delim)
scan_delimited_region( :constant, :string, "", type != "'",
delim, float ? :float : :flush )
end
end
SYNTAX["ruby"] = Ruby
end

108
lib/syntax/lang/xml.rb Normal file
View file

@ -0,0 +1,108 @@
require 'syntax'
module Syntax
# A simple implementation of an XML lexer. It handles most cases. It is
# not a validating lexer, meaning it will happily process invalid XML without
# complaining.
class XML < Tokenizer
# Initialize the lexer.
def setup
@in_tag = false
end
# Step through a single iteration of the tokenization process. This will
# yield (potentially) many tokens, and possibly zero tokens.
def step
start_group :normal, matched if scan( /\s+/ )
if @in_tag
case
when scan( /([-\w]+):([-\w]+)/ )
start_group :namespace, subgroup(1)
start_group :punct, ":"
start_group :attribute, subgroup(2)
when scan( /\d+/ )
start_group :number, matched
when scan( /[-\w]+/ )
start_group :attribute, matched
when scan( %r{[/?]?>} )
@in_tag = false
start_group :punct, matched
when scan( /=/ )
start_group :punct, matched
when scan( /["']/ )
scan_string matched
else
append getch
end
elsif ( text = scan_until( /(?=[<&])/ ) )
start_group :normal, text unless text.empty?
if scan(/<!--.*?(-->|\Z)/m)
start_group :comment, matched
else
case peek(1)
when "<"
start_group :punct, getch
case peek(1)
when "?"
append getch
when "/"
append getch
when "!"
append getch
end
start_group :normal, matched if scan( /\s+/ )
if scan( /([-\w]+):([-\w]+)/ )
start_group :namespace, subgroup(1)
start_group :punct, ":"
start_group :tag, subgroup(2)
elsif scan( /[-\w]+/ )
start_group :tag, matched
end
@in_tag = true
when "&"
if scan( /&\S{1,10};/ )
start_group :entity, matched
else
start_group :normal, scan( /&/ )
end
end
end
else
append scan_until( /\Z/ )
end
end
private
# Scan the string starting at the current position, with the given
# delimiter character.
def scan_string( delim )
start_group :punct, delim
match = /(?=[&\\]|#{delim})/
loop do
break unless ( text = scan_until( match ) )
start_group :string, text unless text.empty?
case peek(1)
when "&"
if scan( /&\S{1,10};/ )
start_group :entity, matched
else
start_group :string, getch
end
when "\\"
start_group :string, getch
append getch || ""
when delim
start_group :punct, getch
break
end
end
end
end
SYNTAX["xml"] = XML
end

105
lib/syntax/lang/yaml.rb Normal file
View file

@ -0,0 +1,105 @@
require 'syntax'
module Syntax
# A simple implementation of an YAML lexer. It handles most cases. It is
# not a validating lexer.
class YAML < Tokenizer
# Step through a single iteration of the tokenization process. This will
# yield (potentially) many tokens, and possibly zero tokens.
def step
if bol?
case
when scan(/---(\s*.+)?$/)
start_group :document, matched
when scan(/(\s*)([a-zA-Z][-\w]*)(\s*):/)
start_group :normal, subgroup(1)
start_group :key, subgroup(2)
start_group :normal, subgroup(3)
start_group :punct, ":"
when scan(/(\s*)-/)
start_group :normal, subgroup(1)
start_group :punct, "-"
when scan(/\s*$/)
start_group :normal, matched
when scan(/#.*$/)
start_group :comment, matched
else
append getch
end
else
case
when scan(/[\n\r]+/)
start_group :normal, matched
when scan(/[ \t]+/)
start_group :normal, matched
when scan(/!+(.*?^)?\S+/)
start_group :type, matched
when scan(/&\S+/)
start_group :anchor, matched
when scan(/\*\S+/)
start_group :ref, matched
when scan(/\d\d:\d\d:\d\d/)
start_group :time, matched
when scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/)
start_group :date, matched
when scan(/['"]/)
start_group :punct, matched
scan_string matched
when scan(/:\w+/)
start_group :symbol, matched
when scan(/[:]/)
start_group :punct, matched
when scan(/#.*$/)
start_group :comment, matched
when scan(/>-?/)
start_group :punct, matched
start_group :normal, scan(/.*$/)
append getch until eos? || bol?
return if eos?
indent = check(/ */)
start_group :string
loop do
line = check_until(/[\n\r]|\Z/)
break if line.nil?
if line.chomp.length > 0
this_indent = line.chomp.match( /^\s*/ )[0]
break if this_indent.length < indent.length
end
append scan_until(/[\n\r]|\Z/)
end
else
start_group :normal, scan_until(/(?=$|#)/)
end
end
end
private
def scan_string( delim )
regex = /(?=[#{delim=="'" ? "" : "\\\\"}#{delim}])/
loop do
text = scan_until( regex )
if text.nil?
start_group :string, scan_until( /\Z/ )
break
else
start_group :string, text unless text.empty?
end
case peek(1)
when "\\"
start_group :expr, scan(/../)
else
start_group :punct, getch
break
end
end
end
end
SYNTAX["yaml"] = YAML
end