105 lines
3 KiB
Ruby
105 lines
3 KiB
Ruby
require 'syntax'
|
|
|
|
module Syntax
|
|
|
|
# A simple implementation of an YAML lexer. It handles most cases. It is
|
|
# not a validating lexer.
|
|
class YAML < Tokenizer
|
|
|
|
# Step through a single iteration of the tokenization process. This will
|
|
# yield (potentially) many tokens, and possibly zero tokens.
|
|
def step
|
|
if bol?
|
|
case
|
|
when scan(/---(\s*.+)?$/)
|
|
start_group :document, matched
|
|
when scan(/(\s*)([a-zA-Z][-\w]*)(\s*):/)
|
|
start_group :normal, subgroup(1)
|
|
start_group :key, subgroup(2)
|
|
start_group :normal, subgroup(3)
|
|
start_group :punct, ":"
|
|
when scan(/(\s*)-/)
|
|
start_group :normal, subgroup(1)
|
|
start_group :punct, "-"
|
|
when scan(/\s*$/)
|
|
start_group :normal, matched
|
|
when scan(/#.*$/)
|
|
start_group :comment, matched
|
|
else
|
|
append getch
|
|
end
|
|
else
|
|
case
|
|
when scan(/[\n\r]+/)
|
|
start_group :normal, matched
|
|
when scan(/[ \t]+/)
|
|
start_group :normal, matched
|
|
when scan(/!+(.*?^)?\S+/)
|
|
start_group :type, matched
|
|
when scan(/&\S+/)
|
|
start_group :anchor, matched
|
|
when scan(/\*\S+/)
|
|
start_group :ref, matched
|
|
when scan(/\d\d:\d\d:\d\d/)
|
|
start_group :time, matched
|
|
when scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/)
|
|
start_group :date, matched
|
|
when scan(/['"]/)
|
|
start_group :punct, matched
|
|
scan_string matched
|
|
when scan(/:\w+/)
|
|
start_group :symbol, matched
|
|
when scan(/[:]/)
|
|
start_group :punct, matched
|
|
when scan(/#.*$/)
|
|
start_group :comment, matched
|
|
when scan(/>-?/)
|
|
start_group :punct, matched
|
|
start_group :normal, scan(/.*$/)
|
|
append getch until eos? || bol?
|
|
return if eos?
|
|
indent = check(/ */)
|
|
start_group :string
|
|
loop do
|
|
line = check_until(/[\n\r]|\Z/)
|
|
break if line.nil?
|
|
if line.chomp.length > 0
|
|
this_indent = line.chomp.match( /^\s*/ )[0]
|
|
break if this_indent.length < indent.length
|
|
end
|
|
append scan_until(/[\n\r]|\Z/)
|
|
end
|
|
else
|
|
start_group :normal, scan_until(/(?=$|#)/)
|
|
end
|
|
end
|
|
end
|
|
|
|
private
|
|
|
|
def scan_string( delim )
|
|
regex = /(?=[#{delim=="'" ? "" : "\\\\"}#{delim}])/
|
|
loop do
|
|
text = scan_until( regex )
|
|
if text.nil?
|
|
start_group :string, scan_until( /\Z/ )
|
|
break
|
|
else
|
|
start_group :string, text unless text.empty?
|
|
end
|
|
|
|
case peek(1)
|
|
when "\\"
|
|
start_group :expr, scan(/../)
|
|
else
|
|
start_group :punct, getch
|
|
break
|
|
end
|
|
end
|
|
end
|
|
|
|
end
|
|
|
|
SYNTAX["yaml"] = YAML
|
|
|
|
end
|