Ruby 1.9 Compatibility
Completely removed the html5lib sanitizer. Fixed the string-handling to work in both Ruby 1.8.x and 1.9.2. There are still, inexplicably, two functional tests that fail. But the rest seems to work quite well.
This commit is contained in:
parent
79c8572053
commit
a6429f8c22
142 changed files with 519 additions and 843 deletions
248
attic/vendor/plugins/HTML5lib/lib/html5/cli.rb
vendored
Normal file
248
attic/vendor/plugins/HTML5lib/lib/html5/cli.rb
vendored
Normal file
|
@ -0,0 +1,248 @@
|
|||
$:.unshift File.dirname(__FILE__), 'lib'
|
||||
require 'html5'
|
||||
require 'ostruct'
|
||||
require 'optparse'
|
||||
|
||||
module HTML5::CLI
|
||||
|
||||
def self.parse_opts argv
|
||||
options = OpenStruct.new
|
||||
options.profile = false
|
||||
options.time = false
|
||||
options.output = :html
|
||||
options.treebuilder = 'simpletree'
|
||||
options.error = false
|
||||
options.encoding = false
|
||||
options.parsemethod = :parse
|
||||
options.serializer = {
|
||||
:encoding => 'utf-8',
|
||||
:omit_optional_tags => false,
|
||||
:inject_meta_charset => false
|
||||
}
|
||||
|
||||
opts = OptionParser.new do |opts|
|
||||
opts.separator ""
|
||||
opts.separator "Parse Options:"
|
||||
|
||||
opts.on("-b", "--treebuilder NAME") do |treebuilder|
|
||||
options.treebuilder = treebuilder
|
||||
end
|
||||
|
||||
opts.on("-f", "--fragment CONTAINER", "Parse as a fragment") do |container|
|
||||
options.parsemethod = :parse_fragment
|
||||
options.container = container if container
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Filter Options:"
|
||||
|
||||
opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
|
||||
options.serializer[:inject_meta_charset] = inject
|
||||
end
|
||||
|
||||
opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
|
||||
options.serializer[:strip_whitespace] = strip
|
||||
end
|
||||
|
||||
opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
|
||||
options.serializer[:sanitize] = sanitize
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Output Options:"
|
||||
|
||||
opts.on("--tree", "output as debug tree") do |tree|
|
||||
options.output = :tree
|
||||
end
|
||||
|
||||
opts.on("-x", "--xml", "output as xml") do |xml|
|
||||
options.output = :xml
|
||||
options.treebuilder = "rexml"
|
||||
end
|
||||
|
||||
opts.on("--[no-]html", "Output as html") do |html|
|
||||
options.output = (html ? :html : nil)
|
||||
end
|
||||
|
||||
opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
|
||||
options.output = :hilite
|
||||
end
|
||||
|
||||
opts.on("-e", "--error", "Print a list of parse errors") do |error|
|
||||
options.error = error
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Serialization Options:"
|
||||
|
||||
opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
|
||||
options.serializer[:omit_optional_tags] = omit
|
||||
end
|
||||
|
||||
opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
|
||||
options.serializer[:quote_attr_values] = quote
|
||||
end
|
||||
|
||||
opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
|
||||
options.serializer[:use_best_quote_char] = best
|
||||
end
|
||||
|
||||
opts.on("--quote-char C", "Use specified quote character") do |c|
|
||||
options.serializer[:quote_char] = c
|
||||
end
|
||||
|
||||
opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
|
||||
options.serializer[:minimize_boolean_attributes] = min
|
||||
end
|
||||
|
||||
opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
|
||||
options.serializer[:use_trailing_solidus] = slash
|
||||
end
|
||||
|
||||
opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
|
||||
options.serializer[:escape_lt_in_attrs] = lt
|
||||
end
|
||||
|
||||
opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
|
||||
options.serializer[:escape_rcdata] = rcdata
|
||||
end
|
||||
|
||||
opts.separator ""
|
||||
opts.separator "Other Options:"
|
||||
|
||||
opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
|
||||
options.profile = profile
|
||||
end
|
||||
|
||||
opts.on("-t", "--[no-]time", "Time the run") do |time|
|
||||
options.time = time
|
||||
end
|
||||
|
||||
opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
|
||||
options.encoding = encoding
|
||||
end
|
||||
|
||||
opts.on_tail("-h", "--help", "Show this message") do
|
||||
puts opts
|
||||
exit
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
opts.parse!(argv)
|
||||
options
|
||||
end
|
||||
|
||||
def self.open_input f
|
||||
if f
|
||||
begin
|
||||
if f[0..6] == 'http://'
|
||||
require 'open-uri'
|
||||
f = URI.parse(f).open
|
||||
encoding = f.charset
|
||||
elsif f == '-'
|
||||
f = $stdin
|
||||
else
|
||||
f = open(f)
|
||||
end
|
||||
rescue
|
||||
end
|
||||
else
|
||||
$stderr.write("No filename provided. Use -h for help\n")
|
||||
exit(1)
|
||||
end
|
||||
f
|
||||
end
|
||||
|
||||
def self.parse(opts, args)
|
||||
encoding = nil
|
||||
|
||||
f = open_input args.last
|
||||
|
||||
require 'html5/treebuilders'
|
||||
treebuilder = HTML5::TreeBuilders[opts.treebuilder]
|
||||
|
||||
if opts.output == :xml
|
||||
require 'html5/liberalxmlparser'
|
||||
p = HTML5::XMLParser.new(:tree=>treebuilder)
|
||||
else
|
||||
require 'html5/html5parser'
|
||||
p = HTML5::HTMLParser.new(:tree=>treebuilder)
|
||||
end
|
||||
|
||||
if opts.parsemethod == :parse
|
||||
args = [f, encoding]
|
||||
else
|
||||
args = [f, (opts.container || 'div'), encoding]
|
||||
end
|
||||
|
||||
if opts.profile
|
||||
require 'profiler'
|
||||
Profiler__::start_profile
|
||||
p.send(opts.parsemethod, *args)
|
||||
Profiler__::stop_profile
|
||||
Profiler__::print_profile($stderr)
|
||||
elsif opts.time
|
||||
require 'time' # TODO: switch to benchmark
|
||||
t0 = Time.new
|
||||
document = p.send(opts.parsemethod, *args)
|
||||
t1 = Time.new
|
||||
print_output(p, document, opts)
|
||||
t2 = Time.new
|
||||
puts "\n\nRun took: #{t1-t0}s (plus #{t2-t1}s to print the output)"
|
||||
else
|
||||
document = p.send(opts.parsemethod, *args)
|
||||
print_output(p, document, opts)
|
||||
end
|
||||
end
|
||||
|
||||
def self.print_output(parser, document, opts)
|
||||
puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding
|
||||
|
||||
case opts.output
|
||||
when :xml
|
||||
print document
|
||||
when :html
|
||||
require 'html5/treewalkers'
|
||||
tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
|
||||
require 'html5/serializer'
|
||||
puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
|
||||
when :hilite
|
||||
print document.hilite
|
||||
when :tree
|
||||
document = [document] unless document.respond_to?(:each)
|
||||
document.each {|fragment| puts parser.tree.testSerializer(fragment)}
|
||||
end
|
||||
|
||||
if opts.error
|
||||
errList=[]
|
||||
for pos, errorcode, datavars in parser.errors
|
||||
formatstring = HTML5::E[errorcode] || 'Unknown error "%(errorcode)"'
|
||||
message = PythonicTemplate.new(formatstring).to_s(datavars)
|
||||
errList << "Line #{pos[0]} Col #{pos[1]} " + message
|
||||
end
|
||||
$stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
|
||||
end
|
||||
end
|
||||
|
||||
class PythonicTemplate
|
||||
# convert Python format string into a Ruby string, ready to eval
|
||||
def initialize format
|
||||
@format = format
|
||||
@format.gsub!('"', '\\"')
|
||||
@format.gsub!(/%\((\w+)\)/, '#{@_\1}')
|
||||
@format = '"' + @format + '"'
|
||||
end
|
||||
|
||||
# evaluate string
|
||||
def to_s(vars=nil)
|
||||
vars.each {|var,value| eval "@_#{var}=#{value.dump}"} if vars
|
||||
eval @format
|
||||
end
|
||||
end
|
||||
|
||||
def self.run
|
||||
options = parse_opts ARGV
|
||||
parse options, ARGV
|
||||
end
|
||||
end
|
1047
attic/vendor/plugins/HTML5lib/lib/html5/constants.rb
vendored
Executable file
1047
attic/vendor/plugins/HTML5lib/lib/html5/constants.rb
vendored
Executable file
File diff suppressed because it is too large
Load diff
10
attic/vendor/plugins/HTML5lib/lib/html5/filters/base.rb
vendored
Normal file
10
attic/vendor/plugins/HTML5lib/lib/html5/filters/base.rb
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
require 'delegate'
|
||||
require 'enumerator'
|
||||
|
||||
module HTML5
|
||||
module Filters
|
||||
class Base < SimpleDelegator
|
||||
include Enumerable
|
||||
end
|
||||
end
|
||||
end
|
82
attic/vendor/plugins/HTML5lib/lib/html5/filters/inject_meta_charset.rb
vendored
Normal file
82
attic/vendor/plugins/HTML5lib/lib/html5/filters/inject_meta_charset.rb
vendored
Normal file
|
@ -0,0 +1,82 @@
|
|||
require 'html5/filters/base'
|
||||
|
||||
module HTML5
|
||||
module Filters
|
||||
class InjectMetaCharset < Base
|
||||
def initialize(source, encoding)
|
||||
super(source)
|
||||
@encoding = encoding
|
||||
end
|
||||
|
||||
def each
|
||||
state = :pre_head
|
||||
meta_found = @encoding.nil?
|
||||
pending = []
|
||||
|
||||
__getobj__.each do |token|
|
||||
case token[:type]
|
||||
when :StartTag
|
||||
state = :in_head if token[:name].downcase == "head"
|
||||
|
||||
when :EmptyTag
|
||||
if token[:name].downcase == "meta"
|
||||
# replace charset with actual encoding
|
||||
token[:data].each_with_index do |(name, value), index|
|
||||
if name == 'charset'
|
||||
token[:data][index][1] = @encoding
|
||||
meta_found = true
|
||||
end
|
||||
end
|
||||
|
||||
# replace charset with actual encoding
|
||||
has_http_equiv_content_type = false
|
||||
content_index = -1
|
||||
token[:data].each_with_index do |(name, value), i|
|
||||
if name.downcase == 'charset'
|
||||
token[:data][i] = ['charset', @encoding]
|
||||
meta_found = true
|
||||
break
|
||||
elsif name == 'http-equiv' and value.downcase == 'content-type'
|
||||
has_http_equiv_content_type = true
|
||||
elsif name == 'content'
|
||||
content_index = i
|
||||
end
|
||||
end
|
||||
|
||||
if !meta_found
|
||||
if has_http_equiv_content_type && content_index >= 0
|
||||
token[:data][content_index][1] = 'text/html; charset=%s' % @encoding
|
||||
meta_found = true
|
||||
end
|
||||
end
|
||||
|
||||
elsif token[:name].downcase == "head" && !meta_found
|
||||
# insert meta into empty head
|
||||
yield :type => :StartTag, :name => "head", :data => token[:data]
|
||||
yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]]
|
||||
yield :type => :EndTag, :name => "head"
|
||||
meta_found = true
|
||||
next
|
||||
end
|
||||
|
||||
when :EndTag
|
||||
if token[:name].downcase == "head" && pending.any?
|
||||
# insert meta into head (if necessary) and flush pending queue
|
||||
yield pending.shift
|
||||
yield :type => :EmptyTag, :name => "meta", :data => [["charset", @encoding]] if !meta_found
|
||||
yield pending.shift while pending.any?
|
||||
meta_found = true
|
||||
state = :post_head
|
||||
end
|
||||
end
|
||||
|
||||
if state == :in_head
|
||||
pending << token
|
||||
else
|
||||
yield token
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
752
attic/vendor/plugins/HTML5lib/lib/html5/filters/iso639codes.rb
vendored
Executable file
752
attic/vendor/plugins/HTML5lib/lib/html5/filters/iso639codes.rb
vendored
Executable file
|
@ -0,0 +1,752 @@
|
|||
# borrowed from feedvalidator, original copyright license is
|
||||
#
|
||||
# Copyright (c) 2002-2006, Sam Ruby, Mark Pilgrim, Joseph Walton, and Phil Ringnalda
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
ISO_LANG = {
|
||||
'aa' => 'Afar',
|
||||
'ab' => 'Abkhazian',
|
||||
'ae' => 'Avestan',
|
||||
'af' => 'Afrikaans',
|
||||
'ak' => 'Akan',
|
||||
'am' => 'Amharic',
|
||||
'an' => 'Aragonese',
|
||||
'ar' => 'Arabic',
|
||||
'as' => 'Assamese',
|
||||
'av' => 'Avaric',
|
||||
'ay' => 'Aymara',
|
||||
'az' => 'Azerbaijani',
|
||||
'ba' => 'Bashkir',
|
||||
'be' => 'Byelorussian',
|
||||
'bg' => 'Bulgarian',
|
||||
'bh' => 'Bihari',
|
||||
'bi' => 'Bislama',
|
||||
'bm' => 'Bambara',
|
||||
'bn' => 'Bengali;Bangla',
|
||||
'bo' => 'Tibetan',
|
||||
'br' => 'Breton',
|
||||
'bs' => 'Bosnian',
|
||||
'ca' => 'Catalan',
|
||||
'ce' => 'Chechen',
|
||||
'ch' => 'Chamorro',
|
||||
'co' => 'Corsican',
|
||||
'cr' => 'Cree',
|
||||
'cs' => 'Czech',
|
||||
'cu' => 'Church Slavic',
|
||||
'cv' => 'Chuvash',
|
||||
'cy' => 'Welsh',
|
||||
'da' => 'Danish',
|
||||
'de' => 'German',
|
||||
'dv' => 'Divehi',
|
||||
'dz' => 'Dzongkha',
|
||||
'ee' => 'Ewe',
|
||||
'el' => 'Greek',
|
||||
'en' => 'English',
|
||||
'eo' => 'Esperanto',
|
||||
'es' => 'Spanish',
|
||||
'et' => 'Estonian',
|
||||
'eu' => 'Basque',
|
||||
'fa' => 'Persian (Farsi)',
|
||||
'ff' => 'Fulah',
|
||||
'fi' => 'Finnish',
|
||||
'fj' => 'Fiji',
|
||||
'fo' => 'Faroese',
|
||||
'fr' => 'French',
|
||||
'fy' => 'Frisian, Western',
|
||||
'ga' => 'Irish',
|
||||
'gd' => 'Scots Gaelic',
|
||||
'gl' => 'Galician',
|
||||
'gn' => 'Guarani',
|
||||
'gu' => 'Gujarati',
|
||||
'gv' => 'Manx',
|
||||
'ha' => 'Hausa',
|
||||
'he' => 'Hebrew',
|
||||
'hi' => 'Hindi',
|
||||
'ho' => 'Hiri Motu',
|
||||
'hr' => 'Croatian',
|
||||
'ht' => 'Haitian',
|
||||
'hu' => 'Hungarian',
|
||||
'hy' => 'Armenian',
|
||||
'hz' => 'Herero',
|
||||
'ia' => 'Interlingua',
|
||||
'id' => 'Indonesian',
|
||||
'ie' => 'Interlingue',
|
||||
'ig' => 'Igbo',
|
||||
'ii' => 'Sichuan Yi',
|
||||
'ik' => 'Inupiak',
|
||||
'io' => 'Ido',
|
||||
'is' => 'Icelandic',
|
||||
'it' => 'Italian',
|
||||
'iu' => 'Inuktitut',
|
||||
'ja' => 'Japanese',
|
||||
'jv' => 'Javanese',
|
||||
'ka' => 'Georgian',
|
||||
'kg' => 'Kongo',
|
||||
'ki' => 'Kikuyu; Gikuyu',
|
||||
'kj' => 'Kuanyama; Kwanyama',
|
||||
'kk' => 'Kazakh',
|
||||
'kl' => 'Greenlandic',
|
||||
'km' => 'Cambodian',
|
||||
'kn' => 'Kannada',
|
||||
'ko' => 'Korean',
|
||||
'kr' => 'Kanuri',
|
||||
'ks' => 'Kashmiri',
|
||||
'ku' => 'Kurdish',
|
||||
'kv' => 'Komi',
|
||||
'kw' => 'Cornish',
|
||||
'ky' => 'Kirghiz',
|
||||
'la' => 'Latin',
|
||||
'lb' => 'Letzeburgesch; Luxembourgish',
|
||||
'lg' => 'Ganda',
|
||||
'li' => 'Limburgan; Limburger, Limburgish',
|
||||
'ln' => 'Lingala',
|
||||
'lo' => 'Lao',
|
||||
'lt' => 'Lithuanian',
|
||||
'lu' => 'Luba-Katanga',
|
||||
'lv' => 'Latvian',
|
||||
'mg' => 'Malagasy',
|
||||
'mh' => 'Marshallese',
|
||||
'mi' => 'Maori',
|
||||
'mk' => 'Macedonian',
|
||||
'ml' => 'Malayalam',
|
||||
'mn' => 'Mongolian',
|
||||
'mo' => 'Moldavian',
|
||||
'mr' => 'Marathi',
|
||||
'ms' => 'Malay',
|
||||
'mt' => 'Maltese',
|
||||
'my' => 'Burmese',
|
||||
'na' => 'Nauru',
|
||||
'nb' => 'Norwegian Bokmal',
|
||||
'nd' => 'Ndebele, North',
|
||||
'ne' => 'Nepali',
|
||||
'ng' => 'Ndonga',
|
||||
'nl' => 'Dutch',
|
||||
'nn' => 'Norwegian Nynorsk',
|
||||
'no' => 'Norwegian',
|
||||
'nr' => 'Ndebele, South',
|
||||
'nv' => 'Navaho; Navajo',
|
||||
'ny' => 'Chewa; Chichewa; Nyanha',
|
||||
'oc' => 'Occitan',
|
||||
'oj' => 'Ojibwa',
|
||||
'om' => 'Afan (Oromo)',
|
||||
'or' => 'Oriya',
|
||||
'os' => 'Ossetian; Ossetic',
|
||||
'pa' => 'Punjabi',
|
||||
'pi' => 'Pali',
|
||||
'pl' => 'Polish',
|
||||
'ps' => 'Pushto',
|
||||
'pt' => 'Portuguese',
|
||||
'qu' => 'Quechua',
|
||||
'rm' => 'Rhaeto-Romance',
|
||||
'rn' => 'Kurundi',
|
||||
'ro' => 'Romanian',
|
||||
'ru' => 'Russian',
|
||||
'rw' => 'Kinyarwanda',
|
||||
'sa' => 'Sanskrit',
|
||||
'sc' => 'Sardinian',
|
||||
'sd' => 'Sindhi',
|
||||
'se' => 'Northern Sami',
|
||||
'sg' => 'Sangho',
|
||||
'sh' => 'Serbo-Croatian',
|
||||
'si' => 'Singhalese',
|
||||
'sk' => 'Slovak',
|
||||
'sl' => 'Slovenian',
|
||||
'sm' => 'Samoan',
|
||||
'sn' => 'Shona',
|
||||
'so' => 'Somali',
|
||||
'sq' => 'Albanian',
|
||||
'sr' => 'Serbian',
|
||||
'ss' => 'Swati',
|
||||
'st' => 'Sotho, Southern',
|
||||
'su' => 'Sundanese',
|
||||
'sv' => 'Swedish',
|
||||
'sw' => 'Swahili',
|
||||
'ta' => 'Tamil',
|
||||
'te' => 'Telugu',
|
||||
'tg' => 'Tajik',
|
||||
'th' => 'Thai',
|
||||
'ti' => 'Tigrinya',
|
||||
'tk' => 'Turkmen',
|
||||
'tl' => 'Tagalog',
|
||||
'tn' => 'Tswana',
|
||||
'to' => 'Tonga',
|
||||
'tr' => 'Turkish',
|
||||
'ts' => 'Tsonga',
|
||||
'tt' => 'Tatar',
|
||||
'tw' => 'Twi',
|
||||
'ty' => 'Tahitian',
|
||||
'ug' => 'Uigur',
|
||||
'uk' => 'Ukrainian',
|
||||
'ur' => 'Urdu',
|
||||
'uz' => 'Uzbek',
|
||||
've' => 'Venda',
|
||||
'vi' => 'Vietnamese',
|
||||
'vo' => 'Volapuk',
|
||||
'wa' => 'Walloon',
|
||||
'wo' => 'Wolof',
|
||||
'xh' => 'Xhosa',
|
||||
'yi' => 'Yiddish',
|
||||
'yo' => 'Yoruba',
|
||||
'za' => 'Zhuang',
|
||||
'zh' => 'Chinese',
|
||||
'zu' => 'Zulu',
|
||||
'x' => 'a user-defined language',
|
||||
'xx' => 'a user-defined language',
|
||||
|
||||
'abk' => 'Abkhazian',
|
||||
'ace' => 'Achinese',
|
||||
'ach' => 'Acoli',
|
||||
'ada' => 'Adangme',
|
||||
'ady' => 'Adygei',
|
||||
'ady' => 'Adyghe',
|
||||
'aar' => 'Afar',
|
||||
'afh' => 'Afrihili',
|
||||
'afr' => 'Afrikaans',
|
||||
'afa' => 'Afro-Asiatic (Other)',
|
||||
'ain' => 'Ainu',
|
||||
'aka' => 'Akan',
|
||||
'akk' => 'Akkadian',
|
||||
'alb' => 'Albanian',
|
||||
'sqi' => 'Albanian',
|
||||
'gws' => 'Alemanic',
|
||||
'ale' => 'Aleut',
|
||||
'alg' => 'Algonquian languages',
|
||||
'tut' => 'Altaic (Other)',
|
||||
'amh' => 'Amharic',
|
||||
'anp' => 'Angika',
|
||||
'apa' => 'Apache languages',
|
||||
'ara' => 'Arabic',
|
||||
'arg' => 'Aragonese',
|
||||
'arc' => 'Aramaic',
|
||||
'arp' => 'Arapaho',
|
||||
'arn' => 'Araucanian',
|
||||
'arw' => 'Arawak',
|
||||
'arm' => 'Armenian',
|
||||
'hye' => 'Armenian',
|
||||
'rup' => 'Aromanian',
|
||||
'art' => 'Artificial (Other)',
|
||||
'asm' => 'Assamese',
|
||||
'ast' => 'Asturian',
|
||||
'ath' => 'Athapascan languages',
|
||||
'aus' => 'Australian languages',
|
||||
'map' => 'Austronesian (Other)',
|
||||
'ava' => 'Avaric',
|
||||
'ave' => 'Avestan',
|
||||
'awa' => 'Awadhi',
|
||||
'aym' => 'Aymara',
|
||||
'aze' => 'Azerbaijani',
|
||||
'ast' => 'Bable',
|
||||
'ban' => 'Balinese',
|
||||
'bat' => 'Baltic (Other)',
|
||||
'bal' => 'Baluchi',
|
||||
'bam' => 'Bambara',
|
||||
'bai' => 'Bamileke languages',
|
||||
'bad' => 'Banda',
|
||||
'bnt' => 'Bantu (Other)',
|
||||
'bas' => 'Basa',
|
||||
'bak' => 'Bashkir',
|
||||
'baq' => 'Basque',
|
||||
'eus' => 'Basque',
|
||||
'btk' => 'Batak (Indonesia)',
|
||||
'bej' => 'Beja',
|
||||
'bel' => 'Belarusian',
|
||||
'bem' => 'Bemba',
|
||||
'ben' => 'Bengali',
|
||||
'ber' => 'Berber (Other)',
|
||||
'bho' => 'Bhojpuri',
|
||||
'bih' => 'Bihari',
|
||||
'bik' => 'Bikol',
|
||||
'byn' => 'Bilin',
|
||||
'bin' => 'Bini',
|
||||
'bis' => 'Bislama',
|
||||
'byn' => 'Blin',
|
||||
'nob' => 'Bokmal, Norwegian',
|
||||
'bos' => 'Bosnian',
|
||||
'bra' => 'Braj',
|
||||
'bre' => 'Breton',
|
||||
'bug' => 'Buginese',
|
||||
'bul' => 'Bulgarian',
|
||||
'bua' => 'Buriat',
|
||||
'bur' => 'Burmese',
|
||||
'mya' => 'Burmese',
|
||||
'cad' => 'Caddo',
|
||||
'car' => 'Carib',
|
||||
'spa' => 'Castilian',
|
||||
'cat' => 'Catalan',
|
||||
'cau' => 'Caucasian (Other)',
|
||||
'ceb' => 'Cebuano',
|
||||
'cel' => 'Celtic (Other)',
|
||||
'cai' => 'Central American Indian (Other)',
|
||||
'chg' => 'Chagatai',
|
||||
'cmc' => 'Chamic languages',
|
||||
'cha' => 'Chamorro',
|
||||
'che' => 'Chechen',
|
||||
'chr' => 'Cherokee',
|
||||
'nya' => 'Chewa',
|
||||
'chy' => 'Cheyenne',
|
||||
'chb' => 'Chibcha',
|
||||
'nya' => 'Chichewa',
|
||||
'chi' => 'Chinese',
|
||||
'zho' => 'Chinese',
|
||||
'chn' => 'Chinook jargon',
|
||||
'chp' => 'Chipewyan',
|
||||
'cho' => 'Choctaw',
|
||||
'zha' => 'Chuang',
|
||||
'chu' => 'Church Slavic; Church Slavonic; Old Church Slavonic; Old Church Slavic; Old Bulgarian',
|
||||
'chk' => 'Chuukese',
|
||||
'chv' => 'Chuvash',
|
||||
'nwc' => 'Classical Nepal Bhasa; Classical Newari; Old Newari',
|
||||
'cop' => 'Coptic',
|
||||
'cor' => 'Cornish',
|
||||
'cos' => 'Corsican',
|
||||
'cre' => 'Cree',
|
||||
'mus' => 'Creek',
|
||||
'crp' => 'Creoles and pidgins(Other)',
|
||||
'cpe' => 'Creoles and pidgins, English-based (Other)',
|
||||
'cpf' => 'Creoles and pidgins, French-based (Other)',
|
||||
'cpp' => 'Creoles and pidgins, Portuguese-based (Other)',
|
||||
'crh' => 'Crimean Tatar; Crimean Turkish',
|
||||
'scr' => 'Croatian',
|
||||
'hrv' => 'Croatian',
|
||||
'cus' => 'Cushitic (Other)',
|
||||
'cze' => 'Czech',
|
||||
'ces' => 'Czech',
|
||||
'dak' => 'Dakota',
|
||||
'dan' => 'Danish',
|
||||
'dar' => 'Dargwa',
|
||||
'day' => 'Dayak',
|
||||
'del' => 'Delaware',
|
||||
'din' => 'Dinka',
|
||||
'div' => 'Divehi',
|
||||
'doi' => 'Dogri',
|
||||
'dgr' => 'Dogrib',
|
||||
'dra' => 'Dravidian (Other)',
|
||||
'dua' => 'Duala',
|
||||
'dut' => 'Dutch',
|
||||
'nld' => 'Dutch',
|
||||
'dum' => 'Dutch, Middle (ca. 1050-1350)',
|
||||
'dyu' => 'Dyula',
|
||||
'dzo' => 'Dzongkha',
|
||||
'efi' => 'Efik',
|
||||
'egy' => 'Egyptian (Ancient)',
|
||||
'eka' => 'Ekajuk',
|
||||
'elx' => 'Elamite',
|
||||
'eng' => 'English',
|
||||
'enm' => 'English, Middle (1100-1500)',
|
||||
'ang' => 'English, Old (ca.450-1100)',
|
||||
'myv' => 'Erzya',
|
||||
'epo' => 'Esperanto',
|
||||
'est' => 'Estonian',
|
||||
'ewe' => 'Ewe',
|
||||
'ewo' => 'Ewondo',
|
||||
'fan' => 'Fang',
|
||||
'fat' => 'Fanti',
|
||||
'fao' => 'Faroese',
|
||||
'fij' => 'Fijian',
|
||||
'fil' => 'Filipino; Pilipino',
|
||||
'fin' => 'Finnish',
|
||||
'fiu' => 'Finno-Ugrian (Other)',
|
||||
'fon' => 'Fon',
|
||||
'fre' => 'French',
|
||||
'fra' => 'French',
|
||||
'frm' => 'French, Middle (ca.1400-1600)',
|
||||
'fro' => 'French, Old (842-ca.1400)',
|
||||
'frs' => 'Frisian, Eastern',
|
||||
'fry' => 'Frisian, Western',
|
||||
'fur' => 'Friulian',
|
||||
'ful' => 'Fulah',
|
||||
'gaa' => 'Ga',
|
||||
'gla' => 'Gaelic',
|
||||
'glg' => 'Gallegan',
|
||||
'lug' => 'Ganda',
|
||||
'gay' => 'Gayo',
|
||||
'gba' => 'Gbaya',
|
||||
'gez' => 'Geez',
|
||||
'geo' => 'Georgian',
|
||||
'kat' => 'Georgian',
|
||||
'ger' => 'German',
|
||||
'deu' => 'German',
|
||||
'nds' => 'German, Low',
|
||||
'gmh' => 'German, Middle High (ca.1050-1500)',
|
||||
'goh' => 'German, Old High (ca.750-1050)',
|
||||
'gem' => 'Germanic (Other)',
|
||||
'kik' => 'Gikuyu',
|
||||
'gil' => 'Gilbertese',
|
||||
'gon' => 'Gondi',
|
||||
'gor' => 'Gorontalo',
|
||||
'got' => 'Gothic',
|
||||
'grb' => 'Grebo',
|
||||
'grc' => 'Greek, Ancient (to 1453)',
|
||||
'gre' => 'Greek, Modern (1453-)',
|
||||
'ell' => 'Greek, Modern (1453-)',
|
||||
'kal' => 'Greenlandic; Kalaallisut',
|
||||
'grn' => 'Guarani',
|
||||
'guj' => 'Gujarati',
|
||||
'gwi' => 'Gwich\'in',
|
||||
'hai' => 'Haida',
|
||||
'hat' => 'Haitian',
|
||||
'hau' => 'Hausa',
|
||||
'haw' => 'Hawaiian',
|
||||
'heb' => 'Hebrew',
|
||||
'her' => 'Herero',
|
||||
'hil' => 'Hiligaynon',
|
||||
'him' => 'Himachali',
|
||||
'hin' => 'Hindi',
|
||||
'hmo' => 'Hiri Motu',
|
||||
'hit' => 'Hittite',
|
||||
'hmn' => 'Hmong',
|
||||
'hun' => 'Hungarian',
|
||||
'hup' => 'Hupa',
|
||||
'iba' => 'Iban',
|
||||
'ice' => 'Icelandic',
|
||||
'isl' => 'Icelandic',
|
||||
'ido' => 'Ido',
|
||||
'ibo' => 'Igbo',
|
||||
'ijo' => 'Ijo',
|
||||
'ilo' => 'Iloko',
|
||||
'smn' => 'Inari Sami',
|
||||
'inc' => 'Indic (Other)',
|
||||
'ine' => 'Indo-European (Other)',
|
||||
'ind' => 'Indonesian',
|
||||
'inh' => 'Ingush',
|
||||
'ina' => 'Interlingua (International Auxiliary Language Association)',
|
||||
'ile' => 'Interlingue',
|
||||
'iku' => 'Inuktitut',
|
||||
'ipk' => 'Inupiaq',
|
||||
'ira' => 'Iranian (Other)',
|
||||
'gle' => 'Irish',
|
||||
'mga' => 'Irish, Middle (900-1200)',
|
||||
'sga' => 'Irish, Old (to 900)',
|
||||
'iro' => 'Iroquoian languages',
|
||||
'ita' => 'Italian',
|
||||
'jpn' => 'Japanese',
|
||||
'jav' => 'Javanese',
|
||||
'jrb' => 'Judeo-Arabic',
|
||||
'jpr' => 'Judeo-Persian',
|
||||
'kbd' => 'Kabardian',
|
||||
'kab' => 'Kabyle',
|
||||
'kac' => 'Kachin',
|
||||
'kal' => 'Kalaallisut',
|
||||
'xal' => 'Kalmyk',
|
||||
'kam' => 'Kamba',
|
||||
'kan' => 'Kannada',
|
||||
'kau' => 'Kanuri',
|
||||
'krc' => 'Karachay-Balkar',
|
||||
'kaa' => 'Kara-Kalpak',
|
||||
'krl' => 'Karelian',
|
||||
'kar' => 'Karen',
|
||||
'kas' => 'Kashmiri',
|
||||
'csb' => 'Kashubian',
|
||||
'kaw' => 'Kawi',
|
||||
'kaz' => 'Kazakh',
|
||||
'kha' => 'Khasi',
|
||||
'khm' => 'Khmer',
|
||||
'khi' => 'Khoisan (Other)',
|
||||
'kho' => 'Khotanese',
|
||||
'kik' => 'Kikuyu',
|
||||
'kmb' => 'Kimbundu',
|
||||
'kin' => 'Kinyarwanda',
|
||||
'kir' => 'Kirghiz',
|
||||
'tlh' => 'Klingon; tlhIngan-Hol',
|
||||
'kom' => 'Komi',
|
||||
'kon' => 'Kongo',
|
||||
'kok' => 'Konkani',
|
||||
'kor' => 'Korean',
|
||||
'kos' => 'Kosraean',
|
||||
'kpe' => 'Kpelle',
|
||||
'kro' => 'Kru',
|
||||
'kua' => 'Kuanyama',
|
||||
'kum' => 'Kumyk',
|
||||
'kur' => 'Kurdish',
|
||||
'kru' => 'Kurukh',
|
||||
'kut' => 'Kutenai',
|
||||
'kua' => 'Kwanyama',
|
||||
'lad' => 'Ladino',
|
||||
'lah' => 'Lahnda',
|
||||
'lam' => 'Lamba',
|
||||
'lao' => 'Lao',
|
||||
'lat' => 'Latin',
|
||||
'lav' => 'Latvian',
|
||||
'ltz' => 'Letzeburgesch',
|
||||
'lez' => 'Lezghian',
|
||||
'lim' => 'Limburgan',
|
||||
'lin' => 'Lingala',
|
||||
'lit' => 'Lithuanian',
|
||||
'jbo' => 'Lojban',
|
||||
'nds' => 'Low German',
|
||||
'dsb' => 'Lower Sorbian',
|
||||
'loz' => 'Lozi',
|
||||
'lub' => 'Luba-Katanga',
|
||||
'lua' => 'Luba-Lulua',
|
||||
'lui' => 'Luiseno',
|
||||
'smj' => 'Lule Sami',
|
||||
'lun' => 'Lunda',
|
||||
'luo' => 'Luo (Kenya and Tanzania)',
|
||||
'lus' => 'Lushai',
|
||||
'ltz' => 'Luxembourgish',
|
||||
'mac' => 'Macedonian',
|
||||
'mkd' => 'Macedonian',
|
||||
'mad' => 'Madurese',
|
||||
'mag' => 'Magahi',
|
||||
'mai' => 'Maithili',
|
||||
'mak' => 'Makasar',
|
||||
'mlg' => 'Malagasy',
|
||||
'may' => 'Malay',
|
||||
'msa' => 'Malay',
|
||||
'mal' => 'Malayalam',
|
||||
'mlt' => 'Maltese',
|
||||
'mnc' => 'Manchu',
|
||||
'mdr' => 'Mandar',
|
||||
'man' => 'Mandingo',
|
||||
'mni' => 'Manipuri',
|
||||
'mno' => 'Manobo languages',
|
||||
'glv' => 'Manx',
|
||||
'mao' => 'Maori',
|
||||
'mri' => 'Maori',
|
||||
'mar' => 'Marathi',
|
||||
'chm' => 'Mari',
|
||||
'mah' => 'Marshallese',
|
||||
'mwr' => 'Marwari',
|
||||
'mas' => 'Masai',
|
||||
'myn' => 'Mayan languages',
|
||||
'men' => 'Mende',
|
||||
'mic' => 'Micmac',
|
||||
'min' => 'Minangkabau',
|
||||
'mwl' => 'Mirandese',
|
||||
'mis' => 'Miscellaneous languages',
|
||||
'moh' => 'Mohawk',
|
||||
'mdf' => 'Moksha',
|
||||
'mol' => 'Moldavian',
|
||||
'mkh' => 'Mon-Khmer (Other)',
|
||||
'lol' => 'Mongo',
|
||||
'mon' => 'Mongolian',
|
||||
'mos' => 'Mossi',
|
||||
'mul' => 'Multiple languages',
|
||||
'mun' => 'Munda languages',
|
||||
'nah' => 'Nahuatl',
|
||||
'nau' => 'Nauru',
|
||||
'nav' => 'Navaho; Navajo',
|
||||
'nde' => 'Ndebele, North',
|
||||
'nbl' => 'Ndebele, South',
|
||||
'ndo' => 'Ndonga',
|
||||
'nap' => 'Neapolitan',
|
||||
'nep' => 'Nepali',
|
||||
'new' => 'Newari',
|
||||
'nia' => 'Nias',
|
||||
'nic' => 'Niger-Kordofanian (Other)',
|
||||
'ssa' => 'Nilo-Saharan (Other)',
|
||||
'niu' => 'Niuean',
|
||||
'nog' => 'Nogai',
|
||||
'non' => 'Norse, Old',
|
||||
'nai' => 'North American Indian (Other)',
|
||||
'frr' => 'Northern Frisian',
|
||||
'sme' => 'Northern Sami',
|
||||
'nso' => 'Northern Sotho; Pedi; Sepedi',
|
||||
'nde' => 'North Ndebele',
|
||||
'nor' => 'Norwegian',
|
||||
'nob' => 'Norwegian Bokmal',
|
||||
'nno' => 'Norwegian Nynorsk',
|
||||
'nub' => 'Nubian languages',
|
||||
'nym' => 'Nyamwezi',
|
||||
'nya' => 'Nyanja',
|
||||
'nyn' => 'Nyankole',
|
||||
'nno' => 'Nynorsk, Norwegian',
|
||||
'nyo' => 'Nyoro',
|
||||
'nzi' => 'Nzima',
|
||||
'oci' => 'Occitan (post 1500)',
|
||||
'oji' => 'Ojibwa',
|
||||
'ori' => 'Oriya',
|
||||
'orm' => 'Oromo',
|
||||
'osa' => 'Osage',
|
||||
'oss' => 'Ossetian; Ossetic',
|
||||
'oto' => 'Otomian languages',
|
||||
'pal' => 'Pahlavi',
|
||||
'pau' => 'Palauan',
|
||||
'pli' => 'Pali',
|
||||
'pam' => 'Pampanga',
|
||||
'pag' => 'Pangasinan',
|
||||
'pan' => 'Panjabi',
|
||||
'pap' => 'Papiamento',
|
||||
'paa' => 'Papuan (Other)',
|
||||
'per' => 'Persian',
|
||||
'fas' => 'Persian',
|
||||
'peo' => 'Persian, Old (ca.600-400)',
|
||||
'phi' => 'Philippine (Other)',
|
||||
'phn' => 'Phoenician',
|
||||
'pon' => 'Pohnpeian',
|
||||
'pol' => 'Polish',
|
||||
'por' => 'Portuguese',
|
||||
'pra' => 'Prakrit languages',
|
||||
'oci' => 'Provencal',
|
||||
'pro' => 'Provencal, Old (to 1500)',
|
||||
'pan' => 'Punjabi',
|
||||
'pus' => 'Pushto',
|
||||
'que' => 'Quechua',
|
||||
'roh' => 'Raeto-Romance',
|
||||
'raj' => 'Rajasthani',
|
||||
'rap' => 'Rapanui',
|
||||
'rar' => 'Rarotongan',
|
||||
'qaa' => 'Reserved for local use',
|
||||
'qtz' => 'Reserved for local use',
|
||||
'roa' => 'Romance (Other)',
|
||||
'rum' => 'Romanian',
|
||||
'ron' => 'Romanian',
|
||||
'rom' => 'Romany',
|
||||
'run' => 'Rundi',
|
||||
'rus' => 'Russian',
|
||||
'sal' => 'Salishan languages',
|
||||
'sam' => 'Samaritan Aramaic',
|
||||
'smi' => 'Sami languages (Other)',
|
||||
'smo' => 'Samoan',
|
||||
'sad' => 'Sandawe',
|
||||
'sag' => 'Sango',
|
||||
'san' => 'Sanskrit',
|
||||
'sat' => 'Santali',
|
||||
'srd' => 'Sardinian',
|
||||
'sas' => 'Sasak',
|
||||
'nds' => 'Saxon, Low',
|
||||
'sco' => 'Scots',
|
||||
'gla' => 'Scottish Gaelic',
|
||||
'sel' => 'Selkup',
|
||||
'sem' => 'Semitic (Other)',
|
||||
'nso' => 'Sepedi; Northern Sotho; Pedi',
|
||||
'scc' => 'Serbian',
|
||||
'srp' => 'Serbian',
|
||||
'srr' => 'Serer',
|
||||
'shn' => 'Shan',
|
||||
'sna' => 'Shona',
|
||||
'iii' => 'Sichuan Yi',
|
||||
'scn' => 'Sicilian',
|
||||
'sid' => 'Sidamo',
|
||||
'sgn' => 'Sign languages',
|
||||
'bla' => 'Siksika',
|
||||
'snd' => 'Sindhi',
|
||||
'sin' => 'Sinhalese',
|
||||
'sit' => 'Sino-Tibetan (Other)',
|
||||
'sio' => 'Siouan languages',
|
||||
'sms' => 'Skolt Sami',
|
||||
'den' => 'Slave (Athapascan)',
|
||||
'sla' => 'Slavic (Other)',
|
||||
'slo' => 'Slovak',
|
||||
'slk' => 'Slovak',
|
||||
'slv' => 'Slovenian',
|
||||
'sog' => 'Sogdian',
|
||||
'som' => 'Somali',
|
||||
'son' => 'Songhai',
|
||||
'snk' => 'Soninke',
|
||||
'wen' => 'Sorbian languages',
|
||||
'nso' => 'Sotho, Northern',
|
||||
'sot' => 'Sotho, Southern',
|
||||
'sai' => 'South American Indian (Other)',
|
||||
'alt' => 'Southern Altai',
|
||||
'sma' => 'Southern Sami',
|
||||
'nbl' => 'South Ndebele',
|
||||
'spa' => 'Spanish',
|
||||
'srn' => 'Sranan Tongo',
|
||||
'suk' => 'Sukuma',
|
||||
'sux' => 'Sumerian',
|
||||
'sun' => 'Sundanese',
|
||||
'sus' => 'Susu',
|
||||
'swa' => 'Swahili',
|
||||
'ssw' => 'Swati',
|
||||
'swe' => 'Swedish',
|
||||
'gsw' => 'Swiss German; Alemanic',
|
||||
'syr' => 'Syriac',
|
||||
'tgl' => 'Tagalog',
|
||||
'tah' => 'Tahitian',
|
||||
'tai' => 'Tai (Other)',
|
||||
'tgk' => 'Tajik',
|
||||
'tmh' => 'Tamashek',
|
||||
'tam' => 'Tamil',
|
||||
'tat' => 'Tatar',
|
||||
'tel' => 'Telugu',
|
||||
'ter' => 'Tereno',
|
||||
'tet' => 'Tetum',
|
||||
'tha' => 'Thai',
|
||||
'tib' => 'Tibetan',
|
||||
'bod' => 'Tibetan',
|
||||
'tig' => 'Tigre',
|
||||
'tir' => 'Tigrinya',
|
||||
'tem' => 'Timne',
|
||||
'tiv' => 'Tiv',
|
||||
'tlh' => 'tlhIngan-Hol; Klingon',
|
||||
'tli' => 'Tlingit',
|
||||
'tpi' => 'Tok Pisin',
|
||||
'tkl' => 'Tokelau',
|
||||
'tog' => 'Tonga (Nyasa)',
|
||||
'ton' => 'Tonga (Tonga Islands)',
|
||||
'tsi' => 'Tsimshian',
|
||||
'tso' => 'Tsonga',
|
||||
'tsn' => 'Tswana',
|
||||
'tum' => 'Tumbuka',
|
||||
'tup' => 'Tupi languages',
|
||||
'tur' => 'Turkish',
|
||||
'ota' => 'Turkish, Ottoman (1500-1928)',
|
||||
'tuk' => 'Turkmen',
|
||||
'tvl' => 'Tuvalu',
|
||||
'tyv' => 'Tuvinian',
|
||||
'twi' => 'Twi',
|
||||
'udm' => 'Udmurt',
|
||||
'uga' => 'Ugaritic',
|
||||
'uig' => 'Uighur',
|
||||
'ukr' => 'Ukrainian',
|
||||
'umb' => 'Umbundu',
|
||||
'und' => 'Undetermined',
|
||||
'hsb' => 'Upper Sorbian',
|
||||
'urd' => 'Urdu',
|
||||
'uzb' => 'Uzbek',
|
||||
'vai' => 'Vai',
|
||||
'cat' => 'Valencian',
|
||||
'ven' => 'Venda',
|
||||
'vie' => 'Vietnamese',
|
||||
'vol' => 'Volapuk',
|
||||
'vot' => 'Votic',
|
||||
'wak' => 'Wakashan languages',
|
||||
'wal' => 'Walamo',
|
||||
'wln' => 'Walloon',
|
||||
'war' => 'Waray',
|
||||
'was' => 'Washo',
|
||||
'wel' => 'Welsh',
|
||||
'cym' => 'Welsh',
|
||||
'fry' => 'Wester Frisian',
|
||||
'wol' => 'Wolof',
|
||||
'xho' => 'Xhosa',
|
||||
'sah' => 'Yakut',
|
||||
'yao' => 'Yao',
|
||||
'yap' => 'Yapese',
|
||||
'yid' => 'Yiddish',
|
||||
'yor' => 'Yoruba',
|
||||
'ypk' => 'Yupik languages',
|
||||
'znd' => 'Zande',
|
||||
'zap' => 'Zapotec',
|
||||
'zen' => 'Zenaga',
|
||||
'zha' => 'Zhuang',
|
||||
'zul' => 'Zulu',
|
||||
'zun' => 'Zuni'
|
||||
}
|
||||
|
||||
def is_valid_lang_code(value)
|
||||
if value.include? '-'
|
||||
lang, sublang = value.split('-', 2)
|
||||
else
|
||||
lang = value
|
||||
end
|
||||
!!ISO_LANG[lang.downcase]
|
||||
end
|
198
attic/vendor/plugins/HTML5lib/lib/html5/filters/optionaltags.rb
vendored
Normal file
198
attic/vendor/plugins/HTML5lib/lib/html5/filters/optionaltags.rb
vendored
Normal file
|
@ -0,0 +1,198 @@
|
|||
require 'html5/constants'
|
||||
require 'html5/filters/base'
|
||||
|
||||
module HTML5
|
||||
module Filters
|
||||
|
||||
class OptionalTagFilter < Base
|
||||
def slider
|
||||
previous1 = previous2 = nil
|
||||
__getobj__.each do |token|
|
||||
yield previous2, previous1, token if previous1 != nil
|
||||
previous2 = previous1
|
||||
previous1 = token
|
||||
end
|
||||
yield previous2, previous1, nil
|
||||
end
|
||||
|
||||
def each
|
||||
slider do |previous, token, nexttok|
|
||||
type = token[:type]
|
||||
if type == :StartTag
|
||||
yield token unless token[:data].empty? and is_optional_start(token[:name], previous, nexttok)
|
||||
elsif type == :EndTag
|
||||
yield token unless is_optional_end(token[:name], nexttok)
|
||||
else
|
||||
yield token
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def is_optional_start(tagname, previous, nexttok)
|
||||
type = nexttok ? nexttok[:type] : nil
|
||||
if tagname == 'html'
|
||||
# An html element's start tag may be omitted if the first thing
|
||||
# inside the html element is not a space character or a comment.
|
||||
return ![:Comment, :SpaceCharacters].include?(type)
|
||||
elsif tagname == 'head'
|
||||
# A head element's start tag may be omitted if the first thing
|
||||
# inside the head element is an element.
|
||||
return type == :StartTag
|
||||
elsif tagname == 'body'
|
||||
# A body element's start tag may be omitted if the first thing
|
||||
# inside the body element is not a space character or a comment,
|
||||
# except if the first thing inside the body element is a script
|
||||
# or style element and the node immediately preceding the body
|
||||
# element is a head element whose end tag has been omitted.
|
||||
if [:Comment, :SpaceCharacters].include?(type)
|
||||
return false
|
||||
elsif type == :StartTag
|
||||
# XXX: we do not look at the preceding event, so we never omit
|
||||
# the body element's start tag if it's followed by a script or
|
||||
# a style element.
|
||||
return !%w[script style].include?(nexttok[:name])
|
||||
else
|
||||
return true
|
||||
end
|
||||
elsif tagname == 'colgroup'
|
||||
# A colgroup element's start tag may be omitted if the first thing
|
||||
# inside the colgroup element is a col element, and if the element
|
||||
# is not immediately preceeded by another colgroup element whose
|
||||
# end tag has been omitted.
|
||||
if type == :StartTag
|
||||
# XXX: we do not look at the preceding event, so instead we never
|
||||
# omit the colgroup element's end tag when it is immediately
|
||||
# followed by another colgroup element. See is_optional_end.
|
||||
return nexttok[:name] == "col"
|
||||
else
|
||||
return false
|
||||
end
|
||||
elsif tagname == 'tbody'
|
||||
# A tbody element's start tag may be omitted if the first thing
|
||||
# inside the tbody element is a tr element, and if the element is
|
||||
# not immediately preceeded by a tbody, thead, or tfoot element
|
||||
# whose end tag has been omitted.
|
||||
if type == :StartTag
|
||||
# omit the thead and tfoot elements' end tag when they are
|
||||
# immediately followed by a tbody element. See is_optional_end.
|
||||
if previous and previous[:type] == :EndTag && %w(tbody thead tfoot).include?(previous[:name])
|
||||
return false
|
||||
end
|
||||
|
||||
return nexttok[:name] == 'tr'
|
||||
else
|
||||
return false
|
||||
end
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
def is_optional_end(tagname, nexttok)
|
||||
type = nexttok ? nexttok[:type] : nil
|
||||
if %w[html head body].include?(tagname)
|
||||
# An html element's end tag may be omitted if the html element
|
||||
# is not immediately followed by a space character or a comment.
|
||||
return ![:Comment, :SpaceCharacters].include?(type)
|
||||
elsif %w[li optgroup option tr].include?(tagname)
|
||||
# A li element's end tag may be omitted if the li element is
|
||||
# immediately followed by another li element or if there is
|
||||
# no more content in the parent element.
|
||||
# An optgroup element's end tag may be omitted if the optgroup
|
||||
# element is immediately followed by another optgroup element,
|
||||
# or if there is no more content in the parent element.
|
||||
# An option element's end tag may be omitted if the option
|
||||
# element is immediately followed by another option element,
|
||||
# or if there is no more content in the parent element.
|
||||
# A tr element's end tag may be omitted if the tr element is
|
||||
# immediately followed by another tr element, or if there is
|
||||
# no more content in the parent element.
|
||||
if type == :StartTag
|
||||
return nexttok[:name] == tagname
|
||||
else
|
||||
return type == :EndTag || type == nil
|
||||
end
|
||||
elsif %w(dt dd).include?(tagname)
|
||||
# A dt element's end tag may be omitted if the dt element is
|
||||
# immediately followed by another dt element or a dd element.
|
||||
# A dd element's end tag may be omitted if the dd element is
|
||||
# immediately followed by another dd element or a dt element,
|
||||
# or if there is no more content in the parent element.
|
||||
if type == :StartTag
|
||||
return %w(dt dd).include?(nexttok[:name])
|
||||
elsif tagname == 'dd'
|
||||
return type == :EndTag || type == nil
|
||||
else
|
||||
return false
|
||||
end
|
||||
elsif tagname == 'p'
|
||||
# A p element's end tag may be omitted if the p element is
|
||||
# immediately followed by an address, blockquote, dl, fieldset,
|
||||
# form, h1, h2, h3, h4, h5, h6, hr, menu, ol, p, pre, table,
|
||||
# or ul element, or if there is no more content in the parent
|
||||
# element.
|
||||
if type == :StartTag
|
||||
return %w(address blockquote dl fieldset form h1 h2 h3 h4 h5
|
||||
h6 hr menu ol p pre table ul).include?(nexttok[:name])
|
||||
else
|
||||
return type == :EndTag || type == nil
|
||||
end
|
||||
elsif tagname == 'colgroup'
|
||||
# A colgroup element's end tag may be omitted if the colgroup
|
||||
# element is not immediately followed by a space character or
|
||||
# a comment.
|
||||
if [:Comment, :SpaceCharacters].include?(type)
|
||||
return false
|
||||
elsif type == :StartTag
|
||||
# XXX: we also look for an immediately following colgroup
|
||||
# element. See is_optional_start.
|
||||
return nexttok[:name] != 'colgroup'
|
||||
else
|
||||
return true
|
||||
end
|
||||
elsif %w(thead tbody).include? tagname
|
||||
# A thead element's end tag may be omitted if the thead element
|
||||
# is immediately followed by a tbody or tfoot element.
|
||||
# A tbody element's end tag may be omitted if the tbody element
|
||||
# is immediately followed by a tbody or tfoot element, or if
|
||||
# there is no more content in the parent element.
|
||||
# A tfoot element's end tag may be omitted if the tfoot element
|
||||
# is immediately followed by a tbody element, or if there is no
|
||||
# more content in the parent element.
|
||||
# XXX: we never omit the end tag when the following element is
|
||||
# a tbody. See is_optional_start.
|
||||
if type == :StartTag
|
||||
return %w(tbody tfoot).include?(nexttok[:name])
|
||||
elsif tagname == 'tbody'
|
||||
return (type == :EndTag or type == nil)
|
||||
else
|
||||
return false
|
||||
end
|
||||
elsif tagname == 'tfoot'
|
||||
# A tfoot element's end tag may be omitted if the tfoot element
|
||||
# is immediately followed by a tbody element, or if there is no
|
||||
# more content in the parent element.
|
||||
# XXX: we never omit the end tag when the following element is
|
||||
# a tbody. See is_optional_start.
|
||||
if type == :StartTag
|
||||
return nexttok[:name] == 'tbody'
|
||||
else
|
||||
return type == :EndTag || type == nil
|
||||
end
|
||||
elsif %w(td th).include? tagname
|
||||
# A td element's end tag may be omitted if the td element is
|
||||
# immediately followed by a td or th element, or if there is
|
||||
# no more content in the parent element.
|
||||
# A th element's end tag may be omitted if the th element is
|
||||
# immediately followed by a td or th element, or if there is
|
||||
# no more content in the parent element.
|
||||
if type == :StartTag
|
||||
return %w(td th).include?(nexttok[:name])
|
||||
else
|
||||
return type == :EndTag || type == nil
|
||||
end
|
||||
end
|
||||
return false
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
30
attic/vendor/plugins/HTML5lib/lib/html5/filters/rfc2046.rb
vendored
Executable file
30
attic/vendor/plugins/HTML5lib/lib/html5/filters/rfc2046.rb
vendored
Executable file
|
@ -0,0 +1,30 @@
|
|||
# adapted from feedvalidator, original copyright license is
|
||||
#
|
||||
# Copyright (c) 2002-2006, Sam Ruby, Mark Pilgrim, Joseph Walton, and Phil Ringnalda
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
# mime_re = Regexp.new('[^\s()<>,;:\\"/[\]?=]+/[^\s()<>,;:\\"/[\]?=]+(\s*;\s*[^\s()<>,;:\\"/[\]?=]+=("(\\"|[^"])*"|[^\s()<>,;:\\"/[\]?=]+))*$')
|
||||
|
||||
def is_valid_mime_type(value)
|
||||
# !!mime_re.match(value)
|
||||
true
|
||||
end
|
||||
|
89
attic/vendor/plugins/HTML5lib/lib/html5/filters/rfc3987.rb
vendored
Executable file
89
attic/vendor/plugins/HTML5lib/lib/html5/filters/rfc3987.rb
vendored
Executable file
|
@ -0,0 +1,89 @@
|
|||
# adapted from feedvalidator, original copyright license is
|
||||
#
|
||||
# Copyright (c) 2002-2006, Sam Ruby, Mark Pilgrim, Joseph Walton, and Phil Ringnalda
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
iana_schemes = [ # http://www.iana.org/assignments/uri-schemes.html
|
||||
"ftp", "http", "gopher", "mailto", "news", "nntp", "telnet", "wais",
|
||||
"file", "prospero", "z39.50s", "z39.50r", "cid", "mid", "vemmi",
|
||||
"service", "imap", "nfs", "acap", "rtsp", "tip", "pop", "data", "dav",
|
||||
"opaquelocktoken", "sip", "sips", "tel", "fax", "modem", "ldap",
|
||||
"https", "soap.beep", "soap.beeps", "xmlrpc.beep", "xmlrpc.beeps",
|
||||
"urn", "go", "h323", "ipp", "tftp", "mupdate", "pres", "im", "mtqp",
|
||||
"iris.beep", "dict", "snmp", "crid", "tag", "dns", "info"
|
||||
]
|
||||
ALLOWED_SCHEMES = iana_schemes + ['javascript']
|
||||
|
||||
RFC2396 = Regexp.new("^([a-zA-Z][0-9a-zA-Z+\\-\\.]*:)?/{0,2}[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]*$", Regexp::MULTILINE)
|
||||
rfc2396_full = Regexp.new("[a-zA-Z][0-9a-zA-Z+\\-\\.]*:(//)?[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]+$")
|
||||
URN = Regexp.new("^[Uu][Rr][Nn]:[a-zA-Z0-9][a-zA-Z0-9-]{1,31}:([a-zA-Z0-9()+,\.:=@;$_!*'\-]|%[0-9A-Fa-f]{2})+$")
|
||||
TAG = Regexp.new("^tag:([a-z0-9\\-\._]+?@)?[a-z0-9\.\-]+?,\d{4}(-\d{2}(-\d{2})?)?:[0-9a-zA-Z;/\?:@&=+$\.\-_!~*'\(\)%,]*(#[0-9a-zA-Z;/\?:@&=+$\.\-_!~*'\(\)%,]*)?$")
|
||||
|
||||
def is_valid_uri(value, uri_pattern = RFC2396)
|
||||
scheme = value.split(':').first
|
||||
scheme.downcase! if scheme
|
||||
if scheme == 'tag'
|
||||
if !TAG.match(value)
|
||||
return false, "invalid-tag-uri"
|
||||
end
|
||||
elsif scheme == "urn"
|
||||
if !URN.match(value)
|
||||
return false, "invalid-urn"
|
||||
end
|
||||
elsif uri_pattern.match(value).to_a.reject{|i| i == ''}.compact.length == 0 || uri_pattern.match(value)[0] != value
|
||||
urichars = Regexp.new("^[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]$", Regexp::MULTILINE)
|
||||
if value.length > 0
|
||||
value.each_byte do |b|
|
||||
if b < 128 and !urichars.match([b].pack('c*'))
|
||||
return false, "invalid-uri-char"
|
||||
end
|
||||
end
|
||||
else
|
||||
begin
|
||||
if uri_pattern.match(value.encode('idna'))
|
||||
return false, "uri-not-iri"
|
||||
end
|
||||
rescue
|
||||
end
|
||||
return false, "invalid-uri"
|
||||
end
|
||||
elsif ['http','ftp'].include?(scheme)
|
||||
if !value.match(%r{^\w+://[^/].*})
|
||||
return false, "invalid-http-or-ftp-uri"
|
||||
end
|
||||
elsif value.index(':') && scheme.match(/^[a-z]+$/) && !ALLOWED_SCHEMES.include?(scheme)
|
||||
return false, "invalid-scheme"
|
||||
end
|
||||
return true, ""
|
||||
end
|
||||
|
||||
def is_valid_iri(value)
|
||||
begin
|
||||
if value.length > 0
|
||||
value = value.encode('idna')
|
||||
end
|
||||
rescue
|
||||
end
|
||||
is_valid_uri(value)
|
||||
end
|
||||
|
||||
def is_valid_fully_qualified_uri(value)
|
||||
is_valid_uri(value, rfc2396_full)
|
||||
end
|
15
attic/vendor/plugins/HTML5lib/lib/html5/filters/sanitizer.rb
vendored
Normal file
15
attic/vendor/plugins/HTML5lib/lib/html5/filters/sanitizer.rb
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
require 'html5/filters/base'
|
||||
require 'html5/sanitizer'
|
||||
|
||||
module HTML5
|
||||
module Filters
|
||||
class HTMLSanitizeFilter < Base
|
||||
include HTMLSanitizeModule
|
||||
def each
|
||||
__getobj__.each do |token|
|
||||
yield(sanitize_token(token))
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
830
attic/vendor/plugins/HTML5lib/lib/html5/filters/validator.rb
vendored
Executable file
830
attic/vendor/plugins/HTML5lib/lib/html5/filters/validator.rb
vendored
Executable file
|
@ -0,0 +1,830 @@
|
|||
# HTML 5 conformance checker
|
||||
#
|
||||
# Warning: this module is experimental, incomplete, and subject to removal at any time.
|
||||
#
|
||||
# Usage:
|
||||
# >>> from html5lib.html5parser import HTMLParser
|
||||
# >>> from html5lib.filters.validator import HTMLConformanceChecker
|
||||
# >>> p = HTMLParser(tokenizer=HTMLConformanceChecker)
|
||||
# >>> p.parse('<!doctype html>\n<html foo=bar></html>')
|
||||
# <<class 'html5lib.treebuilders.simpletree.Document'> nil>
|
||||
# >>> p.errors
|
||||
# [((2, 14), 'unknown-attribute', {'attributeName' => u'foo', 'tagName' => u'html'})]
|
||||
|
||||
require 'html5/constants'
|
||||
require 'html5/filters/base'
|
||||
require 'html5/filters/iso639codes'
|
||||
require 'html5/filters/rfc3987'
|
||||
require 'html5/filters/rfc2046'
|
||||
|
||||
def _(str); str; end
|
||||
|
||||
class String
|
||||
# lifted from rails
|
||||
def underscore()
|
||||
self.gsub(/::/, '/').
|
||||
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
||||
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
||||
tr("-", "_").
|
||||
downcase
|
||||
end
|
||||
end
|
||||
|
||||
HTML5::E.update({
|
||||
"unknown-start-tag" =>
|
||||
_("Unknown start tag <%(tagName)>."),
|
||||
"unknown-attribute" =>
|
||||
_("Unknown '%(attributeName)' attribute on <%(tagName)>."),
|
||||
"missing-required-attribute" =>
|
||||
_("The '%(attributeName)' attribute is required on <%(tagName)>."),
|
||||
"unknown-input-type" =>
|
||||
_("Illegal value for attribute on <input type='%(inputType)'>."),
|
||||
"attribute-not-allowed-on-this-input-type" =>
|
||||
_("The '%(attributeName)' attribute is not allowed on <input type=%(inputType)>."),
|
||||
"deprecated-attribute" =>
|
||||
_("This attribute is deprecated: '%(attributeName)' attribute on <%(tagName)>."),
|
||||
"duplicate-value-in-token-list" =>
|
||||
_("Duplicate value in token list: '%(attributeValue)' in '%(attributeName)' attribute on <%(tagName)>."),
|
||||
"invalid-attribute-value" =>
|
||||
_("Invalid attribute value: '%(attributeName)' attribute on <%(tagName)>."),
|
||||
"space-in-id" =>
|
||||
_("Whitespace is not allowed here: '%(attributeName)' attribute on <%(tagName)>."),
|
||||
"duplicate-id" =>
|
||||
_("This ID was already defined earlier: 'id' attribute on <%(tagName)>."),
|
||||
"attribute-value-can-not-be-blank" =>
|
||||
_("This value can not be blank: '%(attributeName)' attribute on <%(tagName)>."),
|
||||
"id-does-not-exist" =>
|
||||
_("This value refers to a non-existent ID: '%(attributeName)' attribute on <%(tagName)>."),
|
||||
"invalid-enumerated-value" =>
|
||||
_("Value must be one of %(enumeratedValues): '%(attributeName)' attribute on <%tagName)>."),
|
||||
"invalid-boolean-value" =>
|
||||
_("Value must be one of %(enumeratedValues): '%(attributeName)' attribute on <%tagName)>."),
|
||||
"contextmenu-must-point-to-menu" =>
|
||||
_("The contextmenu attribute must point to an ID defined on a <menu> element."),
|
||||
"invalid-lang-code" =>
|
||||
_("Invalid language code: '%(attributeName)' attibute on <%(tagName)>."),
|
||||
"invalid-integer-value" =>
|
||||
_("Value must be an integer: '%(attributeName)' attribute on <%tagName)>."),
|
||||
"invalid-root-namespace" =>
|
||||
_("Root namespace must be 'http://www.w3.org/1999/xhtml', or omitted."),
|
||||
"invalid-browsing-context" =>
|
||||
_("Value must be one of ('_self', '_parent', '_top'), or a name that does not start with '_' => '%(attributeName)' attribute on <%(tagName)>."),
|
||||
"invalid-tag-uri" =>
|
||||
_("Invalid URI: '%(attributeName)' attribute on <%(tagName)>."),
|
||||
"invalid-urn" =>
|
||||
_("Invalid URN: '%(attributeName)' attribute on <%(tagName)>."),
|
||||
"invalid-uri-char" =>
|
||||
_("Illegal character in URI: '%(attributeName)' attribute on <%(tagName)>."),
|
||||
"uri-not-iri" =>
|
||||
_("Expected a URI but found an IRI: '%(attributeName)' attribute on <%(tagName)>."),
|
||||
"invalid-uri" =>
|
||||
_("Invalid URI: '%(attributeName)' attribute on <%(tagName)>."),
|
||||
"invalid-http-or-ftp-uri" =>
|
||||
_("Invalid URI: '%(attributeName)' attribute on <%(tagName)>."),
|
||||
"invalid-scheme" =>
|
||||
_("Unregistered URI scheme: '%(attributeName)' attribute on <%(tagName)>."),
|
||||
"invalid-rel" =>
|
||||
_("Invalid link relation: '%(attributeName)' attribute on <%(tagName)>."),
|
||||
"invalid-mime-type" =>
|
||||
_("Invalid MIME type: '%(attributeName)' attribute on <%(tagName)>."),
|
||||
})
|
||||
|
||||
|
||||
class HTMLConformanceChecker < HTML5::Filters::Base
|
||||
|
||||
@@global_attributes = %w[class contenteditable contextmenu dir
|
||||
draggable id irrelevant lang ref tabindex template
|
||||
title onabort onbeforeunload onblur onchange onclick
|
||||
oncontextmenu ondblclick ondrag ondragend ondragenter
|
||||
ondragleave ondragover ondragstart ondrop onerror
|
||||
onfocus onkeydown onkeypress onkeyup onload onmessage
|
||||
onmousedown onmousemove onmouseout onmouseover onmouseup
|
||||
onmousewheel onresize onscroll onselect onsubmit onunload]
|
||||
# XXX lang in HTML only, xml:lang in XHTML only
|
||||
# XXX validate ref, template
|
||||
|
||||
@@allowed_attribute_map = {
|
||||
'html' => %w[xmlns],
|
||||
'head' => [],
|
||||
'title' => [],
|
||||
'base' => %w[href target],
|
||||
'link' => %w[href rel media hreflang type],
|
||||
'meta' => %w[name http-equiv content charset], # XXX charset in HTML only
|
||||
'style' => %w[media type scoped],
|
||||
'body' => [],
|
||||
'section' => [],
|
||||
'nav' => [],
|
||||
'article' => [],
|
||||
'blockquote' => %w[cite],
|
||||
'aside' => [],
|
||||
'h1' => [],
|
||||
'h2' => [],
|
||||
'h3' => [],
|
||||
'h4' => [],
|
||||
'h5' => [],
|
||||
'h6' => [],
|
||||
'header' => [],
|
||||
'footer' => [],
|
||||
'address' => [],
|
||||
'p' => [],
|
||||
'hr' => [],
|
||||
'br' => [],
|
||||
'dialog' => [],
|
||||
'pre' => [],
|
||||
'ol' => %w[start],
|
||||
'ul' => [],
|
||||
'li' => %w[value], # XXX depends on parent
|
||||
'dl' => [],
|
||||
'dt' => [],
|
||||
'dd' => [],
|
||||
'a' => %w[href target ping rel media hreflang type],
|
||||
'q' => %w[cite],
|
||||
'cite' => [],
|
||||
'em' => [],
|
||||
'strong' => [],
|
||||
'small' => [],
|
||||
'm' => [],
|
||||
'dfn' => [],
|
||||
'abbr' => [],
|
||||
'time' => %w[datetime],
|
||||
'meter' => %w[value min low high max optimum],
|
||||
'progress' => %w[value max],
|
||||
'code' => [],
|
||||
'var' => [],
|
||||
'samp' => [],
|
||||
'kbd' => [],
|
||||
'sup' => [],
|
||||
'sub' => [],
|
||||
'span' => [],
|
||||
'i' => [],
|
||||
'b' => [],
|
||||
'bdo' => [],
|
||||
'ins' => %w[cite datetime],
|
||||
'del' => %w[cite datetime],
|
||||
'figure' => [],
|
||||
'img' => %w[alt src usemap ismap height width], # XXX ismap depends on parent
|
||||
'iframe' => %w[src],
|
||||
# <embed> handled separately
|
||||
'object' => %w[data type usemap height width],
|
||||
'param' => %w[name value],
|
||||
'video' => %w[src autoplay start loopstart loopend end loopcount controls],
|
||||
'audio' => %w[src autoplay start loopstart loopend end loopcount controls],
|
||||
'source' => %w[src type media],
|
||||
'canvas' => %w[height width],
|
||||
'map' => [],
|
||||
'area' => %w[alt coords shape href target ping rel media hreflang type],
|
||||
'table' => [],
|
||||
'caption' => [],
|
||||
'colgroup' => %w[span], # XXX only if element contains no <col> elements
|
||||
'col' => %w[span],
|
||||
'tbody' => [],
|
||||
'thead' => [],
|
||||
'tfoot' => [],
|
||||
'tr' => [],
|
||||
'td' => %w[colspan rowspan],
|
||||
'th' => %w[colspan rowspan scope],
|
||||
# all possible <input> attributes are listed here but <input> is really handled separately
|
||||
'input' => %w[accept accesskey action alt autocomplete autofocus checked
|
||||
disabled enctype form inputmode list maxlength method min
|
||||
max name pattern step readonly replace required size src
|
||||
tabindex target template value
|
||||
],
|
||||
'form' => %w[action method enctype accept name onsubmit onreset accept-charset
|
||||
data replace
|
||||
],
|
||||
'button' => %w[action enctype method replace template name value type disabled form autofocus], # XXX may need matrix of acceptable attributes based on value of type attribute (like input)
|
||||
'select' => %w[name size multiple disabled data accesskey form autofocus],
|
||||
'optgroup' => %w[disabled label],
|
||||
'option' => %w[selected disabled label value],
|
||||
'textarea' => %w[maxlength name rows cols disabled readonly required form autofocus wrap accept],
|
||||
'label' => %w[for accesskey form],
|
||||
'fieldset' => %w[disabled form],
|
||||
'output' => %w[form name for onforminput onformchange],
|
||||
'datalist' => %w[data],
|
||||
# XXX repetition model for repeating form controls
|
||||
'script' => %w[src defer async type],
|
||||
'noscript' => [],
|
||||
'noembed' => [],
|
||||
'event-source' => %w[src],
|
||||
'details' => %w[open],
|
||||
'datagrid' => %w[multiple disabled],
|
||||
'command' => %w[type label icon hidden disabled checked radiogroup default],
|
||||
'menu' => %w[type label autosubmit],
|
||||
'datatemplate' => [],
|
||||
'rule' => [],
|
||||
'nest' => [],
|
||||
'legend' => [],
|
||||
'div' => [],
|
||||
'font' => %w[style]
|
||||
}
|
||||
|
||||
@@required_attribute_map = {
|
||||
'link' => %w[href rel],
|
||||
'bdo' => %w[dir],
|
||||
'img' => %w[src],
|
||||
'embed' => %w[src],
|
||||
'object' => [], # XXX one of 'data' or 'type' is required
|
||||
'param' => %w[name value],
|
||||
'source' => %w[src],
|
||||
'map' => %w[id]
|
||||
}
|
||||
|
||||
@@input_type_allowed_attribute_map = {
|
||||
'text' => %w[accesskey autocomplete autofocus disabled form inputmode list maxlength name pattern readonly required size tabindex value],
|
||||
'password' => %w[accesskey autocomplete autofocus disabled form inputmode maxlength name pattern readonly required size tabindex value],
|
||||
'checkbox' => %w[accesskey autofocus checked disabled form name required tabindex value],
|
||||
'radio' => %w[accesskey autofocus checked disabled form name required tabindex value],
|
||||
'button' => %w[accesskey autofocus disabled form name tabindex value],
|
||||
'submit' => %w[accesskey action autofocus disabled enctype form method name replace tabindex target value],
|
||||
'reset' => %w[accesskey autofocus disabled form name tabindex value],
|
||||
'add' => %w[accesskey autofocus disabled form name tabindex template value],
|
||||
'remove' => %w[accesskey autofocus disabled form name tabindex value],
|
||||
'move-up' => %w[accesskey autofocus disabled form name tabindex value],
|
||||
'move-down' => %w[accesskey autofocus disabled form name tabindex value],
|
||||
'file' => %w[accept accesskey autofocus disabled form min max name required tabindex],
|
||||
'hidden' => %w[disabled form name value],
|
||||
'image' => %w[accesskey action alt autofocus disabled enctype form method name replace src tabindex target],
|
||||
'datetime' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
||||
'datetime-local' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
||||
'date' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
||||
'month' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
||||
'week' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
||||
'time' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
||||
'number' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
||||
'range' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
||||
'email' => %w[accesskey autocomplete autofocus disabled form inputmode list maxlength name pattern readonly required tabindex value],
|
||||
'url' => %w[accesskey autocomplete autofocus disabled form inputmode list maxlength name pattern readonly required tabindex value],
|
||||
}
|
||||
|
||||
@@input_type_deprecated_attribute_map = {
|
||||
'text' => ['size'],
|
||||
'password' => ['size']
|
||||
}
|
||||
|
||||
@@link_rel_values = %w[alternate archive archives author contact feed first begin start help icon index top contents toc last end license copyright next pingback prefetch prev previous search stylesheet sidebar tag up]
|
||||
@@a_rel_values = %w[alternate archive archives author contact feed first begin start help index top contents toc last end license copyright next prev previous search sidebar tag up bookmark external nofollow]
|
||||
|
||||
def initialize(stream, *args)
|
||||
super(HTML5::HTMLTokenizer.new(stream, *args))
|
||||
@things_that_define_an_id = []
|
||||
@things_that_point_to_an_id = []
|
||||
@ids_we_have_known_and_loved = []
|
||||
end
|
||||
|
||||
def each
|
||||
__getobj__.each do |token|
|
||||
method = "validate_#{token.fetch(:type, '-').to_s.underscore}_#{token.fetch(:name, '-').to_s.underscore}"
|
||||
if respond_to?(method)
|
||||
send(method, token){|t| yield t }
|
||||
else
|
||||
method = "validate_#{token.fetch(:type, '-').to_s.underscore}"
|
||||
if respond_to?(method)
|
||||
send(method, token) do |t|
|
||||
yield t
|
||||
end
|
||||
end
|
||||
end
|
||||
yield token
|
||||
end
|
||||
eof do |t|
|
||||
yield t
|
||||
end
|
||||
end
|
||||
|
||||
##########################################################################
|
||||
# Start tag validation
|
||||
##########################################################################
|
||||
|
||||
def validate_start_tag(token)
|
||||
check_unknown_start_tag(token){|t| yield t}
|
||||
check_start_tag_required_attributes(token) do |t|
|
||||
yield t
|
||||
end
|
||||
check_start_tag_unknown_attributes(token) do |t|
|
||||
yield t
|
||||
end
|
||||
check_attribute_values(token) do |t|
|
||||
yield t
|
||||
end
|
||||
end
|
||||
|
||||
def validate_start_tag_embed(token)
|
||||
check_start_tag_required_attributes(token) do |t|
|
||||
yield t
|
||||
end
|
||||
check_attribute_values(token) do |t|
|
||||
yield t
|
||||
end
|
||||
# spec says "any attributes w/o namespace"
|
||||
# so don't call check_start_tag_unknown_attributes
|
||||
end
|
||||
|
||||
def validate_start_tag_input(token)
|
||||
check_attribute_values(token) do |t|
|
||||
yield t
|
||||
end
|
||||
attr_dict = Hash[*token[:data].collect{|(name, value)| [name.downcase, value]}.flatten]
|
||||
input_type = attr_dict.fetch('type', "text")
|
||||
if !@@input_type_allowed_attribute_map.keys().include?(input_type)
|
||||
yield({:type => "ParseError",
|
||||
:data => "unknown-input-type",
|
||||
:datavars => {:attrValue => input_type}})
|
||||
end
|
||||
allowed_attributes = @@input_type_allowed_attribute_map.fetch(input_type, [])
|
||||
attr_dict.each do |attr_name, attr_value|
|
||||
if !@@allowed_attribute_map['input'].include?(attr_name)
|
||||
yield({:type => "ParseError",
|
||||
:data => "unknown-attribute",
|
||||
:datavars => {"tagName" => "input",
|
||||
"attributeName" => attr_name}})
|
||||
elsif !allowed_attributes.include?(attr_name)
|
||||
yield({:type => "ParseError",
|
||||
:data => "attribute-not-allowed-on-this-input-type",
|
||||
:datavars => {"attributeName" => attr_name,
|
||||
"inputType" => input_type}})
|
||||
end
|
||||
if @@input_type_deprecated_attribute_map.fetch(input_type, []).include?(attr_name)
|
||||
yield({:type => "ParseError",
|
||||
:data => "deprecated-attribute",
|
||||
:datavars => {"attributeName" => attr_name,
|
||||
"inputType" => input_type}})
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
##########################################################################
|
||||
# Start tag validation helpers
|
||||
##########################################################################
|
||||
|
||||
def check_unknown_start_tag(token)
|
||||
# check for recognized tag name
|
||||
name = (token[:name] || "").downcase
|
||||
if !@@allowed_attribute_map.keys.include?(name)
|
||||
yield({:type => "ParseError",
|
||||
:data => "unknown-start-tag",
|
||||
:datavars => {"tagName" => name}})
|
||||
end
|
||||
end
|
||||
|
||||
def check_start_tag_required_attributes(token)
|
||||
# check for presence of required attributes
|
||||
name = (token[:name] || "").downcase
|
||||
if @@required_attribute_map.keys().include?(name)
|
||||
attrs_present = (token[:data] || []).collect{|t| t[0]}
|
||||
for attr_name in @@required_attribute_map[name]
|
||||
if !attrs_present.include?(attr_name)
|
||||
yield( {:type => "ParseError",
|
||||
:data => "missing-required-attribute",
|
||||
:datavars => {"tagName" => name,
|
||||
"attributeName" => attr_name}})
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def check_start_tag_unknown_attributes(token)
|
||||
# check for recognized attribute names
|
||||
name = token[:name].downcase
|
||||
allowed_attributes = @@global_attributes | @@allowed_attribute_map.fetch(name, [])
|
||||
for attr_name, attr_value in token.fetch(:data, [])
|
||||
if !allowed_attributes.include?(attr_name.downcase())
|
||||
yield( {:type => "ParseError",
|
||||
:data => "unknown-attribute",
|
||||
:datavars => {"tagName" => name,
|
||||
"attributeName" => attr_name}})
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
##########################################################################
|
||||
# Attribute validation helpers
|
||||
##########################################################################
|
||||
|
||||
# def checkURI(token, tag_name, attr_name, attr_value)
|
||||
# is_valid, error_code = rfc3987.is_valid_uri(attr_value)
|
||||
# if not is_valid
|
||||
# yield {:type => "ParseError",
|
||||
# :data => error_code,
|
||||
# :datavars => {"tagName" => tag_name,
|
||||
# "attributeName" => attr_name}}
|
||||
# yield {:type => "ParseError",
|
||||
# :data => "invalid-attribute-value",
|
||||
# :datavars => {"tagName" => tag_name,
|
||||
# "attributeName" => attr_name}}
|
||||
|
||||
def check_iri(token, tag_name, attr_name, attr_value)
|
||||
is_valid, error_code = is_valid_iri(attr_value)
|
||||
if !is_valid
|
||||
yield({:type => "ParseError",
|
||||
:data => error_code,
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name}})
|
||||
yield({:type => "ParseError",
|
||||
:data => "invalid-attribute-value",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name}})
|
||||
end
|
||||
end
|
||||
|
||||
def check_id(token, tag_name, attr_name, attr_value)
|
||||
if !attr_value || attr_value.length == 0
|
||||
yield({:type => "ParseError",
|
||||
:data => "attribute-value-can-not-be-blank",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name}})
|
||||
end
|
||||
attr_value.each_byte do |b|
|
||||
c = [b].pack('c*')
|
||||
if HTML5::SPACE_CHARACTERS.include?(c)
|
||||
yield( {:type => "ParseError",
|
||||
:data => "space-in-id",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name}})
|
||||
yield( {:type => "ParseError",
|
||||
:data => "invalid-attribute-value",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name}})
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def parse_token_list(value)
|
||||
valueList = []
|
||||
currentValue = ''
|
||||
(value + ' ').each_byte do |b|
|
||||
c = [b].pack('c*')
|
||||
if HTML5::SPACE_CHARACTERS.include?(c)
|
||||
if currentValue.length > 0
|
||||
valueList << currentValue
|
||||
currentValue = ''
|
||||
end
|
||||
else
|
||||
currentValue += c
|
||||
end
|
||||
end
|
||||
if currentValue.length > 0
|
||||
valueList << currentValue
|
||||
end
|
||||
valueList
|
||||
end
|
||||
|
||||
def check_token_list(tag_name, attr_name, attr_value)
|
||||
# The "token" in the method name refers to tokens in an attribute value
|
||||
# i.e. http://www.whatwg.org/specs/web-apps/current-work/#set-of
|
||||
# but the "token" parameter refers to the token generated from
|
||||
# HTMLTokenizer. Sorry for the confusion.
|
||||
value_list = parse_token_list(attr_value)
|
||||
value_dict = {}
|
||||
for current_value in value_list
|
||||
if value_dict.has_key?(current_value)
|
||||
yield({:type => "ParseError",
|
||||
:data => "duplicate-value-in-token-list",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name,
|
||||
"attributeValue" => current_value}})
|
||||
break
|
||||
end
|
||||
value_dict[current_value] = 1
|
||||
end
|
||||
end
|
||||
|
||||
def check_enumerated_value(token, tag_name, attr_name, attr_value, enumerated_values)
|
||||
if !attr_value || attr_value.length == 0
|
||||
yield( {:type => "ParseError",
|
||||
:data => "attribute-value-can-not-be-blank",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name}})
|
||||
return
|
||||
end
|
||||
attr_value.downcase!
|
||||
if !enumerated_values.include?(attr_value)
|
||||
yield( {:type => "ParseError",
|
||||
:data => "invalid-enumerated-value",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attribute_name" => attr_name,
|
||||
"enumeratedValues" => enumerated_values}})
|
||||
yield( {:type => "ParseError",
|
||||
:data => "invalid-attribute-value",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name}})
|
||||
end
|
||||
end
|
||||
|
||||
def check_boolean(token, tag_name, attr_name, attr_value)
|
||||
enumerated_values = [attr_name, '']
|
||||
if !enumerated_values.include?(attr_value)
|
||||
yield( {:type => "ParseError",
|
||||
:data => "invalid-boolean-value",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name,
|
||||
"enumeratedValues" => enumerated_values}})
|
||||
yield( {:type => "ParseError",
|
||||
:data => "invalid-attribute-value",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name}})
|
||||
end
|
||||
end
|
||||
|
||||
def check_integer(token, tag_name, attr_name, attr_value)
|
||||
sign = 1
|
||||
number_string = ''
|
||||
state = 'begin' # ('begin', 'initial-number', 'number', 'trailing-junk')
|
||||
error = {:type => "ParseError",
|
||||
:data => "invalid-integer-value",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name,
|
||||
"attributeValue" => attr_value}}
|
||||
attr_value.scan(/./) do |c|
|
||||
if state == 'begin'
|
||||
if HTML5::SPACE_CHARACTERS.include?(c)
|
||||
next
|
||||
elsif c == '-'
|
||||
sign = -1
|
||||
state = 'initial-number'
|
||||
elsif HTML5::DIGITS.include?(c)
|
||||
number_string += c
|
||||
state = 'in-number'
|
||||
else
|
||||
yield error
|
||||
return
|
||||
end
|
||||
elsif state == 'initial-number'
|
||||
if !HTML5::DIGITS.include?(c)
|
||||
yield error
|
||||
return
|
||||
end
|
||||
number_string += c
|
||||
state = 'in-number'
|
||||
elsif state == 'in-number'
|
||||
if HTML5::DIGITS.include?(c)
|
||||
number_string += c
|
||||
else
|
||||
state = 'trailing-junk'
|
||||
end
|
||||
elsif state == 'trailing-junk'
|
||||
next
|
||||
end
|
||||
end
|
||||
if number_string.length == 0
|
||||
yield( {:type => "ParseError",
|
||||
:data => "attribute-value-can-not-be-blank",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name}})
|
||||
end
|
||||
end
|
||||
|
||||
def check_floating_point_number(token, tag_name, attr_name, attr_value)
|
||||
# XXX
|
||||
end
|
||||
|
||||
def check_browsing_context(token, tag_name, attr_name, attr_value)
|
||||
return if not attr_value
|
||||
return if attr_value[0] != ?_
|
||||
attr_value.downcase!
|
||||
return if ['_self', '_parent', '_top', '_blank'].include?(attr_value)
|
||||
yield({:type => "ParseError",
|
||||
:data => "invalid-browsing-context",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name}})
|
||||
end
|
||||
|
||||
def check_lang_code(token, tag_name, attr_name, attr_value)
|
||||
return if !attr_value || attr_value == '' # blank is OK
|
||||
if not is_valid_lang_code(attr_value)
|
||||
yield( {:type => "ParseError",
|
||||
:data => "invalid-lang-code",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name,
|
||||
"attributeValue" => attr_value}})
|
||||
end
|
||||
end
|
||||
|
||||
def check_mime_type(token, tag_name, attr_name, attr_value)
|
||||
# XXX needs tests
|
||||
if not attr_value
|
||||
yield( {:type => "ParseError",
|
||||
:data => "attribute-value-can-not-be-blank",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name}})
|
||||
end
|
||||
if not is_valid_mime_type(attr_value)
|
||||
yield( {:type => "ParseError",
|
||||
:data => "invalid-mime-type",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name,
|
||||
"attributeValue" => attr_value}})
|
||||
end
|
||||
end
|
||||
|
||||
def check_media_query(token, tag_name, attr_name, attr_value)
|
||||
# XXX
|
||||
end
|
||||
|
||||
def check_link_relation(token, tag_name, attr_name, attr_value)
|
||||
check_token_list(tag_name, attr_name, attr_value) do |t|
|
||||
yield t
|
||||
end
|
||||
value_list = parse_token_list(attr_value)
|
||||
allowed_values = tag_name == 'link' ? @@link_rel_values : @@a_rel_values
|
||||
for current_value in value_list
|
||||
if !allowed_values.include?(current_value)
|
||||
yield({:type => "ParseError",
|
||||
:data => "invalid-rel",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name}})
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def check_date_time(token, tag_name, attr_name, attr_value)
|
||||
# XXX
|
||||
state = 'begin' # ('begin', '...
|
||||
# for c in attr_value
|
||||
# if state == 'begin' =>
|
||||
# if SPACE_CHARACTERS.include?(c)
|
||||
# continue
|
||||
# elsif digits.include?(c)
|
||||
# state = ...
|
||||
end
|
||||
|
||||
##########################################################################
|
||||
# Attribute validation
|
||||
##########################################################################
|
||||
|
||||
def check_attribute_values(token)
|
||||
tag_name = token.fetch(:name, "")
|
||||
for attr_name, attr_value in token.fetch(:data, [])
|
||||
attr_name = attr_name.downcase
|
||||
method = "validate_attribute_value_#{tag_name.to_s.underscore}_#{attr_name.to_s.underscore}"
|
||||
if respond_to?(method)
|
||||
send(method, token, tag_name, attr_name, attr_value) do |t|
|
||||
yield t
|
||||
end
|
||||
else
|
||||
method = "validate_attribute_value_#{attr_name.to_s.underscore}"
|
||||
if respond_to?(method)
|
||||
send(method, token, tag_name, attr_name, attr_value) do |t|
|
||||
yield t
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def validate_attribute_value_class(token, tag_name, attr_name, attr_value)
|
||||
check_token_list(tag_name, attr_name, attr_value) do |t|
|
||||
yield t
|
||||
yield( {:type => "ParseError",
|
||||
:data => "invalid-attribute-value",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name}})
|
||||
end
|
||||
end
|
||||
|
||||
def validate_attribute_value_contenteditable(token, tag_name, attr_name, attr_value)
|
||||
check_enumerated_value(token, tag_name, attr_name, attr_value, ['true', 'false', '']) do |t|
|
||||
yield t
|
||||
end
|
||||
end
|
||||
|
||||
def validate_attribute_value_dir(token, tag_name, attr_name, attr_value)
|
||||
check_enumerated_value(token, tag_name, attr_name, attr_value, ['ltr', 'rtl']) do |t|
|
||||
yield t
|
||||
end
|
||||
end
|
||||
|
||||
def validate_attribute_value_draggable(token, tag_name, attr_name, attr_value)
|
||||
check_enumerated_value(token, tag_name, attr_name, attr_value, ['true', 'false']) do |t|
|
||||
yield t
|
||||
end
|
||||
end
|
||||
|
||||
alias validate_attribute_value_irrelevant check_boolean
|
||||
alias validate_attribute_value_lang check_lang_code
|
||||
|
||||
def validate_attribute_value_contextmenu(token, tag_name, attr_name, attr_value)
|
||||
check_id(token, tag_name, attr_name, attr_value) do |t|
|
||||
yield t
|
||||
end
|
||||
@things_that_point_to_an_id << token
|
||||
end
|
||||
|
||||
def validate_attribute_value_id(token, tag_name, attr_name, attr_value)
|
||||
# This method has side effects. It adds 'token' to the list of
|
||||
# things that define an ID (@things_that_define_an_id) so that we can
|
||||
# later check 1) whether an ID is duplicated, and 2) whether all the
|
||||
# things that point to something else by ID (like <label for> or
|
||||
# <span contextmenu>) point to an ID that actually exists somewhere.
|
||||
check_id(token, tag_name, attr_name, attr_value) do |t|
|
||||
yield t
|
||||
end
|
||||
return if not attr_value
|
||||
if @ids_we_have_known_and_loved.include?(attr_value)
|
||||
yield( {:type => "ParseError",
|
||||
:data => "duplicate-id",
|
||||
:datavars => {"tagName" => tag_name}})
|
||||
end
|
||||
@ids_we_have_known_and_loved << attr_value
|
||||
@things_that_define_an_id << token
|
||||
end
|
||||
|
||||
alias validate_attribute_value_tabindex check_integer
|
||||
|
||||
def validate_attribute_value_ref(token, tag_name, attr_name, attr_value)
|
||||
# XXX
|
||||
end
|
||||
|
||||
def validate_attribute_value_template(token, tag_name, attr_name, attr_value)
|
||||
# XXX
|
||||
end
|
||||
|
||||
def validate_attribute_value_html_xmlns(token, tag_name, attr_name, attr_value)
|
||||
if attr_value != "http://www.w3.org/1999/xhtml"
|
||||
yield( {:type => "ParseError",
|
||||
:data => "invalid-root-namespace",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => attr_name}})
|
||||
end
|
||||
end
|
||||
|
||||
alias validate_attribute_value_base_href check_iri
|
||||
alias validate_attribute_value_base_target check_browsing_context
|
||||
alias validate_attribute_value_link_href check_iri
|
||||
alias validate_attribute_value_link_rel check_link_relation
|
||||
alias validate_attribute_value_link_media check_media_query
|
||||
alias validate_attribute_value_link_hreflang check_lang_code
|
||||
alias validate_attribute_value_link_type check_mime_type
|
||||
# XXX <meta> attributes
|
||||
alias validate_attribute_value_style_media check_media_query
|
||||
alias validate_attribute_value_style_type check_mime_type
|
||||
alias validate_attribute_value_style_scoped check_boolean
|
||||
alias validate_attribute_value_blockquote_cite check_iri
|
||||
alias validate_attribute_value_ol_start check_integer
|
||||
alias validate_attribute_value_li_value check_integer
|
||||
# XXX need tests from here on
|
||||
alias validate_attribute_value_a_href check_iri
|
||||
alias validate_attribute_value_a_target check_browsing_context
|
||||
|
||||
def validate_attribute_value_a_ping(token, tag_name, attr_name, attr_value)
|
||||
value_list = parse_token_list(attr_value)
|
||||
for current_value in value_list
|
||||
checkIRI(token, tag_name, attr_name, attr_value) do |t|
|
||||
yield t
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
alias validate_attribute_value_a_rel check_link_relation
|
||||
alias validate_attribute_value_a_media check_media_query
|
||||
alias validate_attribute_value_a_hreflang check_lang_code
|
||||
alias validate_attribute_value_a_type check_mime_type
|
||||
alias validate_attribute_value_q_cite check_iri
|
||||
alias validate_attribute_value_time_datetime check_date_time
|
||||
alias validate_attribute_value_meter_value check_floating_point_number
|
||||
alias validate_attribute_value_meter_min check_floating_point_number
|
||||
alias validate_attribute_value_meter_low check_floating_point_number
|
||||
alias validate_attribute_value_meter_high check_floating_point_number
|
||||
alias validate_attribute_value_meter_max check_floating_point_number
|
||||
alias validate_attribute_value_meter_optimum check_floating_point_number
|
||||
alias validate_attribute_value_progress_value check_floating_point_number
|
||||
alias validate_attribute_value_progress_max check_floating_point_number
|
||||
alias validate_attribute_value_ins_cite check_iri
|
||||
alias validate_attribute_value_ins_datetime check_date_time
|
||||
alias validate_attribute_value_del_cite check_iri
|
||||
alias validate_attribute_value_del_datetime check_date_time
|
||||
|
||||
##########################################################################
|
||||
# Whole document validation (IDs, etc.)
|
||||
##########################################################################
|
||||
|
||||
def eof
|
||||
for token in @things_that_point_to_an_id
|
||||
tag_name = token.fetch(:name, "").downcase
|
||||
attrs_dict = token[:data] # by now html5parser has "normalized" the attrs list into a dict.
|
||||
# hooray for obscure side effects!
|
||||
attr_value = attrs_dict.fetch("contextmenu", "")
|
||||
if attr_value and (!@ids_we_have_known_and_loved.include?(attr_value))
|
||||
yield( {:type => "ParseError",
|
||||
:data => "id-does-not-exist",
|
||||
:datavars => {"tagName" => tag_name,
|
||||
"attributeName" => "contextmenu",
|
||||
"attributeValue" => attr_value}})
|
||||
else
|
||||
for ref_token in @things_that_define_an_id
|
||||
id = ref_token.fetch(:data, {}).fetch("id", "")
|
||||
if not id
|
||||
continue
|
||||
end
|
||||
if id == attr_value
|
||||
if ref_token.fetch(:name, "").downcase != "men"
|
||||
yield( {:type => "ParseError",
|
||||
:data => "contextmenu-must-point-to-menu"})
|
||||
end
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
36
attic/vendor/plugins/HTML5lib/lib/html5/filters/whitespace.rb
vendored
Normal file
36
attic/vendor/plugins/HTML5lib/lib/html5/filters/whitespace.rb
vendored
Normal file
|
@ -0,0 +1,36 @@
|
|||
require 'html5/constants'
|
||||
require 'html5/filters/base'
|
||||
|
||||
module HTML5
|
||||
module Filters
|
||||
class WhitespaceFilter < Base
|
||||
|
||||
SPACE_PRESERVE_ELEMENTS = %w[pre textarea] + RCDATA_ELEMENTS
|
||||
SPACES = /[#{SPACE_CHARACTERS.join('')}]+/m
|
||||
|
||||
def each
|
||||
preserve = 0
|
||||
__getobj__.each do |token|
|
||||
case token[:type]
|
||||
when :StartTag
|
||||
if preserve > 0 or SPACE_PRESERVE_ELEMENTS.include?(token[:name])
|
||||
preserve += 1
|
||||
end
|
||||
|
||||
when :EndTag
|
||||
preserve -= 1 if preserve > 0
|
||||
|
||||
when :SpaceCharacters
|
||||
token[:data] = " " if preserve == 0 && token[:data]
|
||||
|
||||
when :Characters
|
||||
token[:data] = token[:data].sub(SPACES,' ') if preserve == 0
|
||||
end
|
||||
|
||||
yield token
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
248
attic/vendor/plugins/HTML5lib/lib/html5/html5parser.rb
vendored
Normal file
248
attic/vendor/plugins/HTML5lib/lib/html5/html5parser.rb
vendored
Normal file
|
@ -0,0 +1,248 @@
|
|||
require 'html5/constants'
|
||||
require 'html5/tokenizer'
|
||||
require 'html5/treebuilders/rexml'
|
||||
|
||||
Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path|
|
||||
require 'html5/html5parser/' + File.basename(path)
|
||||
end
|
||||
|
||||
module HTML5
|
||||
|
||||
# Error in parsed document
|
||||
class ParseError < Exception; end
|
||||
class AssertionError < Exception; end
|
||||
|
||||
# HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML
|
||||
#
|
||||
class HTMLParser
|
||||
|
||||
attr_accessor :phase, :first_start_tag, :inner_html, :last_phase, :insert_from_table
|
||||
|
||||
attr_reader :phases, :tokenizer, :tree, :errors
|
||||
|
||||
def self.parse(stream, options = {})
|
||||
encoding = options.delete(:encoding)
|
||||
new(options).parse(stream,encoding)
|
||||
end
|
||||
|
||||
def self.parse_fragment(stream, options = {})
|
||||
container = options.delete(:container) || 'div'
|
||||
encoding = options.delete(:encoding)
|
||||
new(options).parse_fragment(stream, container, encoding)
|
||||
end
|
||||
|
||||
@@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
|
||||
inColumnGroup inTableBody inRow inCell inSelect afterBody inFrameset afterFrameset trailingEnd )
|
||||
|
||||
# :strict - raise an exception when a parse error is encountered
|
||||
# :tree - a treebuilder class controlling the type of tree that will be
|
||||
# returned. Built in treebuilders can be accessed through
|
||||
# HTML5::TreeBuilders[treeType]
|
||||
def initialize(options = {})
|
||||
@strict = false
|
||||
@errors = []
|
||||
|
||||
@tokenizer = HTMLTokenizer
|
||||
@tree = TreeBuilders::REXML::TreeBuilder
|
||||
|
||||
options.each {|name, value| instance_variable_set("@#{name}", value) }
|
||||
@lowercase_attr_name = nil unless instance_variable_defined?("@lowercase_attr_name")
|
||||
@lowercase_element_name = nil unless instance_variable_defined?("@lowercase_element_name")
|
||||
|
||||
@tree = @tree.new
|
||||
|
||||
@phases = @@phases.inject({}) do |phases, phase_name|
|
||||
phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
|
||||
phases[phase_name.to_sym] = HTML5.const_get(phase_class_name).new(self, @tree)
|
||||
phases
|
||||
end
|
||||
end
|
||||
|
||||
def _parse(stream, inner_html, encoding, container = 'div')
|
||||
@tree.reset
|
||||
@first_start_tag = false
|
||||
@errors = []
|
||||
|
||||
@tokenizer = @tokenizer.class unless Class === @tokenizer
|
||||
@tokenizer = @tokenizer.new(stream, :encoding => encoding,
|
||||
:parseMeta => !inner_html, :lowercase_attr_name => @lowercase_attr_name, :lowercase_element_name => @lowercase_element_name)
|
||||
|
||||
if inner_html
|
||||
case @inner_html = container.downcase
|
||||
when 'title', 'textarea'
|
||||
@tokenizer.content_model_flag = :RCDATA
|
||||
when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
|
||||
@tokenizer.content_model_flag = :CDATA
|
||||
when 'plaintext'
|
||||
@tokenizer.content_model_flag = :PLAINTEXT
|
||||
else
|
||||
# content_model_flag already is PCDATA
|
||||
@tokenizer.content_model_flag = :PCDATA
|
||||
end
|
||||
|
||||
@phase = @phases[:rootElement]
|
||||
@phase.insert_html_element
|
||||
reset_insertion_mode
|
||||
else
|
||||
@inner_html = false
|
||||
@phase = @phases[:initial]
|
||||
end
|
||||
|
||||
# We only seem to have InBodyPhase testcases where the following is
|
||||
# relevant ... need others too
|
||||
@last_phase = nil
|
||||
|
||||
# XXX This is temporary for the moment so there isn't any other
|
||||
# changes needed for the parser to work with the iterable tokenizer
|
||||
@tokenizer.each do |token|
|
||||
token = normalize_token(token)
|
||||
|
||||
method = 'process%s' % token[:type]
|
||||
|
||||
case token[:type]
|
||||
when :Characters, :SpaceCharacters, :Comment
|
||||
@phase.send method, token[:data]
|
||||
when :StartTag
|
||||
@phase.send method, token[:name], token[:data]
|
||||
when :EndTag
|
||||
@phase.send method, token[:name]
|
||||
when :Doctype
|
||||
@phase.send method, token[:name], token[:publicId],
|
||||
token[:systemId], token[:correct]
|
||||
else
|
||||
parse_error(token[:data], token[:datavars])
|
||||
end
|
||||
end
|
||||
|
||||
# When the loop finishes it's EOF
|
||||
@phase.process_eof
|
||||
end
|
||||
|
||||
# Parse a HTML document into a well-formed tree
|
||||
#
|
||||
# stream - a filelike object or string containing the HTML to be parsed
|
||||
#
|
||||
# The optional encoding parameter must be a string that indicates
|
||||
# the encoding. If specified, that encoding will be used,
|
||||
# regardless of any BOM or later declaration (such as in a meta
|
||||
# element)
|
||||
def parse(stream, encoding=nil)
|
||||
_parse(stream, false, encoding)
|
||||
@tree.get_document
|
||||
end
|
||||
|
||||
# Parse a HTML fragment into a well-formed tree fragment
|
||||
|
||||
# container - name of the element we're setting the inner_html property
|
||||
# if set to nil, default to 'div'
|
||||
#
|
||||
# stream - a filelike object or string containing the HTML to be parsed
|
||||
#
|
||||
# The optional encoding parameter must be a string that indicates
|
||||
# the encoding. If specified, that encoding will be used,
|
||||
# regardless of any BOM or later declaration (such as in a meta
|
||||
# element)
|
||||
def parse_fragment(stream, container='div', encoding=nil)
|
||||
_parse(stream, true, encoding, container)
|
||||
@tree.get_fragment
|
||||
end
|
||||
|
||||
def parse_error(code = 'XXX-undefined-error', data = {})
|
||||
# XXX The idea is to make data mandatory.
|
||||
@errors.push([@tokenizer.stream.position, code, data])
|
||||
raise ParseError if @strict
|
||||
end
|
||||
|
||||
# HTML5 specific normalizations to the token stream
|
||||
def normalize_token(token)
|
||||
|
||||
if token[:type] == :EmptyTag
|
||||
# When a solidus (/) is encountered within a tag name what happens
|
||||
# depends on whether the current tag name matches that of a void
|
||||
# element. If it matches a void element atheists did the wrong
|
||||
# thing and if it doesn't it's wrong for everyone.
|
||||
|
||||
unless VOID_ELEMENTS.include?(token[:name])
|
||||
parse_error("incorrectly-placed-solidus")
|
||||
end
|
||||
|
||||
token[:type] = :StartTag
|
||||
end
|
||||
|
||||
if token[:type] == :StartTag
|
||||
token[:name] = token[:name].downcase
|
||||
|
||||
# We need to remove the duplicate attributes and convert attributes
|
||||
# to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
|
||||
|
||||
unless token[:data].empty?
|
||||
data = token[:data].reverse.map {|attr, value| [attr.downcase, value] }
|
||||
token[:data] = Hash[*data.flatten]
|
||||
end
|
||||
|
||||
elsif token[:type] == :EndTag
|
||||
parse_error("attributes-in-end-tag") unless token[:data].empty?
|
||||
token[:name] = token[:name].downcase
|
||||
end
|
||||
|
||||
token
|
||||
end
|
||||
|
||||
@@new_modes = {
|
||||
'select' => :inSelect,
|
||||
'td' => :inCell,
|
||||
'th' => :inCell,
|
||||
'tr' => :inRow,
|
||||
'tbody' => :inTableBody,
|
||||
'thead' => :inTableBody,
|
||||
'tfoot' => :inTableBody,
|
||||
'caption' => :inCaption,
|
||||
'colgroup' => :inColumnGroup,
|
||||
'table' => :inTable,
|
||||
'head' => :inBody,
|
||||
'body' => :inBody,
|
||||
'frameset' => :inFrameset
|
||||
}
|
||||
|
||||
def reset_insertion_mode
|
||||
# The name of this method is mostly historical. (It's also used in the
|
||||
# specification.)
|
||||
last = false
|
||||
|
||||
@tree.open_elements.reverse.each do |node|
|
||||
node_name = node.name
|
||||
|
||||
if node == @tree.open_elements.first
|
||||
last = true
|
||||
unless ['td', 'th'].include?(node_name)
|
||||
# XXX
|
||||
# assert @inner_html
|
||||
node_name = @inner_html
|
||||
end
|
||||
end
|
||||
|
||||
# Check for conditions that should only happen in the inner_html
|
||||
# case
|
||||
if ['select', 'colgroup', 'head', 'frameset'].include?(node_name)
|
||||
# XXX
|
||||
# assert @inner_html
|
||||
end
|
||||
|
||||
if @@new_modes.has_key?(node_name)
|
||||
@phase = @phases[@@new_modes[node_name]]
|
||||
elsif node_name == 'html'
|
||||
@phase = @phases[@tree.head_pointer.nil?? :beforeHead : :afterHead]
|
||||
elsif last
|
||||
@phase = @phases[:inBody]
|
||||
else
|
||||
next
|
||||
end
|
||||
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
def _(string); string; end
|
||||
end
|
||||
|
||||
end
|
46
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb
vendored
Normal file
46
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb
vendored
Normal file
|
@ -0,0 +1,46 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class AfterBodyPhase < Phase
|
||||
|
||||
handle_end 'html'
|
||||
|
||||
def processComment(data)
|
||||
# This is needed because data is to be appended to the <html> element
|
||||
# here and not to whatever is currently open.
|
||||
@tree.insert_comment(data, @tree.open_elements.first)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
parse_error("unexpected-char-after-body")
|
||||
@parser.phase = @parser.phases[:inBody]
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def processStartTag(name, attributes)
|
||||
parse_error("unexpected-start-tag-after-body", {"name" => name})
|
||||
@parser.phase = @parser.phases[:inBody]
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagHtml(name)
|
||||
if @parser.inner_html
|
||||
parse_error "end-html-in-innerhtml"
|
||||
else
|
||||
# XXX: This may need to be done, not sure
|
||||
# Don't set last_phase to the current phase but to the inBody phase
|
||||
# instead. No need for extra parse errors if there's something after </html>.
|
||||
# Try "<!doctype html>X</html>X" for instance.
|
||||
@parser.last_phase = @parser.phase
|
||||
@parser.phase = @parser.phases[:trailingEnd]
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
parse_error("unexpected-end-tag-after-body", {"name" => name})
|
||||
@parser.phase = @parser.phases[:inBody]
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
33
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/after_frameset_phase.rb
vendored
Normal file
33
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/after_frameset_phase.rb
vendored
Normal file
|
@ -0,0 +1,33 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class AfterFramesetPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#after3
|
||||
|
||||
handle_start 'html', 'noframes'
|
||||
|
||||
handle_end 'html'
|
||||
|
||||
def processCharacters(data)
|
||||
parse_error("unexpected-char-after-frameset")
|
||||
end
|
||||
|
||||
def startTagNoframes(name, attributes)
|
||||
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
parse_error("unexpected-start-tag-after-frameset", {"name" => name})
|
||||
end
|
||||
|
||||
def endTagHtml(name)
|
||||
@parser.last_phase = @parser.phase
|
||||
@parser.phase = @parser.phases[:trailingEnd]
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
parse_error("unexpected-end-tag-after-frameset", {"name" => name})
|
||||
end
|
||||
end
|
||||
end
|
50
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/after_head_phase.rb
vendored
Normal file
50
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/after_head_phase.rb
vendored
Normal file
|
@ -0,0 +1,50 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class AfterHeadPhase < Phase
|
||||
|
||||
handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'
|
||||
|
||||
def process_eof
|
||||
anything_else
|
||||
@parser.phase.process_eof
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
anything_else
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def startTagBody(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inBody]
|
||||
end
|
||||
|
||||
def startTagFrameset(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inFrameset]
|
||||
end
|
||||
|
||||
def startTagFromHead(name, attributes)
|
||||
parse_error("unexpected-start-tag-out-of-my-head", {"name" => name})
|
||||
@parser.phase = @parser.phases[:inHead]
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
anything_else
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def processEndTag(name)
|
||||
anything_else
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
def anything_else
|
||||
@tree.insert_element('body', {})
|
||||
@parser.phase = @parser.phases[:inBody]
|
||||
end
|
||||
|
||||
end
|
||||
end
|
41
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/before_head_phase.rb
vendored
Normal file
41
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/before_head_phase.rb
vendored
Normal file
|
@ -0,0 +1,41 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class BeforeHeadPhase < Phase
|
||||
|
||||
handle_start 'html', 'head'
|
||||
|
||||
handle_end %w( html head body br p ) => 'ImplyHead'
|
||||
|
||||
def process_eof
|
||||
startTagHead('head', {})
|
||||
@parser.phase.process_eof
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
startTagHead('head', {})
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def startTagHead(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.head_pointer = @tree.open_elements[-1]
|
||||
@parser.phase = @parser.phases[:inHead]
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
startTagHead('head', {})
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagImplyHead(name)
|
||||
startTagHead('head', {})
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
parse_error("end-tag-after-implied-root", {"name" => name})
|
||||
end
|
||||
|
||||
end
|
||||
end
|
609
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb
vendored
Normal file
609
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb
vendored
Normal file
|
@ -0,0 +1,609 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InBodyPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-body
|
||||
|
||||
handle_start 'html'
|
||||
handle_start %w(base link meta script style) => 'ProcessInHead'
|
||||
handle_start 'title'
|
||||
|
||||
handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
|
||||
|
||||
handle_start 'input', 'textarea', 'select', 'isindex', %w(marquee object)
|
||||
|
||||
handle_start %w(li dd dt) => 'ListItem'
|
||||
|
||||
handle_start %w(address blockquote center dir div dl fieldset listing menu ol p pre ul) => 'CloseP'
|
||||
|
||||
handle_start %w(b big em font i s small strike strong tt u) => 'Formatting'
|
||||
handle_start 'nobr'
|
||||
|
||||
handle_start %w(area basefont bgsound br embed img param spacer wbr) => 'VoidFormatting'
|
||||
|
||||
handle_start %w(iframe noembed noframes noscript) => 'Cdata', HEADING_ELEMENTS => 'Heading'
|
||||
|
||||
handle_start %w(caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr) => 'Misplaced'
|
||||
|
||||
handle_start %w(event-source section nav article aside header footer datagrid command) => 'New'
|
||||
|
||||
handle_end 'p', 'body', 'html', 'form', %w(button marquee object), %w(dd dt li) => 'ListItem'
|
||||
|
||||
handle_end %w(address blockquote center div dl fieldset listing menu ol pre ul) => 'Block'
|
||||
|
||||
handle_end HEADING_ELEMENTS => 'Heading'
|
||||
|
||||
handle_end %w(a b big em font i nobr s small strike strong tt u) => 'Formatting'
|
||||
|
||||
handle_end %w(head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th) => 'Misplaced'
|
||||
|
||||
handle_end 'br'
|
||||
|
||||
handle_end %w(area basefont bgsound embed hr image img input isindex param spacer wbr frame) => 'None'
|
||||
|
||||
handle_end %w(noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
|
||||
|
||||
handle_end %w(event-source section nav article aside header footer datagrid command) => 'New'
|
||||
|
||||
def initialize(parser, tree)
|
||||
super(parser, tree)
|
||||
|
||||
# for special handling of whitespace in <pre>
|
||||
class << self
|
||||
alias processSpaceCharactersNonPre processSpaceCharacters
|
||||
end
|
||||
end
|
||||
|
||||
def processSpaceCharactersDropNewline(data)
|
||||
# #Sometimes (start of <pre> blocks) we want to drop leading newlines
|
||||
|
||||
class << self
|
||||
remove_method :processSpaceCharacters rescue nil
|
||||
alias processSpaceCharacters processSpaceCharactersNonPre
|
||||
end
|
||||
|
||||
if (data.length > 0 and data[0] == ?\n &&
|
||||
%w[pre textarea].include?(@tree.open_elements.last.name) && !@tree.open_elements.last.hasContent)
|
||||
data = data[1..-1]
|
||||
end
|
||||
|
||||
if data.length > 0
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertText(data)
|
||||
end
|
||||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
@tree.reconstructActiveFormattingElements()
|
||||
@tree.insertText(data)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
# XXX The specification says to do this for every character at the
|
||||
# moment, but apparently that doesn't match the real world so we don't
|
||||
# do it for space characters.
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertText(data)
|
||||
end
|
||||
|
||||
def startTagProcessInHead(name, attributes)
|
||||
@parser.phases[:inHead].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagTitle(name, attributes)
|
||||
parse_error("unexpected-start-tag-out-of-my-head", {"name" => name})
|
||||
@parser.phases[:inHead].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagBody(name, attributes)
|
||||
parse_error("unexpected-start-tag", {"name" => "body"})
|
||||
|
||||
if @tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body'
|
||||
assert @parser.inner_html
|
||||
else
|
||||
attributes.each do |attr, value|
|
||||
unless @tree.open_elements[1].attributes.has_key?(attr)
|
||||
@tree.open_elements[1].attributes[attr] = value
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def startTagCloseP(name, attributes)
|
||||
endTagP('p') if in_scope?('p')
|
||||
@tree.insert_element(name, attributes)
|
||||
if name == 'pre'
|
||||
class << self
|
||||
remove_method :processSpaceCharacters rescue nil
|
||||
alias processSpaceCharacters processSpaceCharactersDropNewline
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def startTagForm(name, attributes)
|
||||
if @tree.formPointer
|
||||
parse_error("unexpected-start-tag", {"name" => name})
|
||||
else
|
||||
endTagP('p') if in_scope?('p')
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.formPointer = @tree.open_elements.last
|
||||
end
|
||||
end
|
||||
|
||||
def startTagListItem(name, attributes)
|
||||
endTagP('p') if in_scope?('p')
|
||||
stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
|
||||
stopName = stopNames[name]
|
||||
|
||||
@tree.open_elements.reverse.each_with_index do |node, i|
|
||||
if stopName.include?(node.name)
|
||||
poppedNodes = (0..i).collect { @tree.open_elements.pop }
|
||||
if i >= 1
|
||||
parse_error(
|
||||
i == 1 ? "missing-end-tag" : "missing-end-tags",
|
||||
{"name" => poppedNodes[0..-1].collect{|n| n.name}.join(", ")})
|
||||
|
||||
end
|
||||
break
|
||||
end
|
||||
|
||||
# Phrasing elements are all non special, non scoping, non
|
||||
# formatting elements
|
||||
break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) && !%w[address div].include?(node.name))
|
||||
end
|
||||
|
||||
# Always insert an <li> element.
|
||||
@tree.insert_element(name, attributes)
|
||||
end
|
||||
|
||||
def startTagPlaintext(name, attributes)
|
||||
endTagP('p') if in_scope?('p')
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.tokenizer.content_model_flag = :PLAINTEXT
|
||||
end
|
||||
|
||||
def startTagHeading(name, attributes)
|
||||
endTagP('p') if in_scope?('p')
|
||||
|
||||
# Uncomment the following for IE7 behavior:
|
||||
# HEADING_ELEMENTS.each do |element|
|
||||
# if in_scope?(element)
|
||||
# parse_error("unexpected-start-tag", {"name" => name})
|
||||
#
|
||||
# remove_open_elements_until do |element|
|
||||
# HEADING_ELEMENTS.include?(element.name)
|
||||
# end
|
||||
#
|
||||
# break
|
||||
# end
|
||||
# end
|
||||
@tree.insert_element(name, attributes)
|
||||
end
|
||||
|
||||
def startTagA(name, attributes)
|
||||
if afeAElement = @tree.elementInActiveFormattingElements('a')
|
||||
parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "a", "endName" => "a"})
|
||||
endTagFormatting('a')
|
||||
@tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
|
||||
@tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
|
||||
end
|
||||
@tree.reconstructActiveFormattingElements
|
||||
addFormattingElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagFormatting(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
addFormattingElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagNobr(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
if in_scope?('nobr')
|
||||
parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "nobr", "endName" => "nobr"})
|
||||
processEndTag('nobr')
|
||||
# XXX Need tests that trigger the following
|
||||
@tree.reconstructActiveFormattingElements
|
||||
end
|
||||
addFormattingElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagButton(name, attributes)
|
||||
if in_scope?('button')
|
||||
parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "button", "endName" => "button"})
|
||||
processEndTag('button')
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
else
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.activeFormattingElements.push(Marker)
|
||||
end
|
||||
end
|
||||
|
||||
def startTagMarqueeObject(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.activeFormattingElements.push(Marker)
|
||||
end
|
||||
|
||||
def startTagXmp(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.tokenizer.content_model_flag = :CDATA
|
||||
end
|
||||
|
||||
def startTagTable(name, attributes)
|
||||
processEndTag('p') if in_scope?('p')
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inTable]
|
||||
end
|
||||
|
||||
def startTagVoidFormatting(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
|
||||
def startTagHr(name, attributes)
|
||||
endTagP('p') if in_scope?('p')
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
|
||||
def startTagImage(name, attributes)
|
||||
# No really...
|
||||
parse_error("unexpected-start-tag-treated-as", {"originalName" => "image", "newName" => "img"})
|
||||
processStartTag('img', attributes)
|
||||
end
|
||||
|
||||
def startTagInput(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insert_element(name, attributes)
|
||||
if @tree.formPointer
|
||||
# XXX Not exactly sure what to do here
|
||||
# @tree.open_elements[-1].form = @tree.formPointer
|
||||
end
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
|
||||
def startTagIsindex(name, attributes)
|
||||
parse_error("deprecated-tag", {"name" => "isindex"})
|
||||
return if @tree.formPointer
|
||||
processStartTag('form', {})
|
||||
processStartTag('hr', {})
|
||||
processStartTag('p', {})
|
||||
processStartTag('label', {})
|
||||
# XXX Localization ...
|
||||
processCharacters('This is a searchable index. Insert your search keywords here: ')
|
||||
attributes['name'] = 'isindex'
|
||||
attrs = attributes.to_a
|
||||
processStartTag('input', attributes)
|
||||
processEndTag('label')
|
||||
processEndTag('p')
|
||||
processStartTag('hr', {})
|
||||
processEndTag('form')
|
||||
end
|
||||
|
||||
def startTagTextarea(name, attributes)
|
||||
# XXX Form element pointer checking here as well...
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.tokenizer.content_model_flag = :RCDATA
|
||||
class << self
|
||||
remove_method :processSpaceCharacters rescue nil
|
||||
alias processSpaceCharacters processSpaceCharactersDropNewline
|
||||
end
|
||||
end
|
||||
|
||||
# iframe, noembed noframes, noscript(if scripting enabled)
|
||||
def startTagCdata(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.tokenizer.content_model_flag = :CDATA
|
||||
end
|
||||
|
||||
def startTagSelect(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inSelect]
|
||||
end
|
||||
|
||||
def startTagMisplaced(name, attributes)
|
||||
# Elements that should be children of other elements that have a
|
||||
# different insertion mode; here they are ignored
|
||||
# "caption", "col", "colgroup", "frame", "frameset", "head",
|
||||
# "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
|
||||
# "tr", "noscript"
|
||||
parse_error("unexpected-start-tag-ignored", {"name" => name})
|
||||
end
|
||||
|
||||
def startTagNew(name, attributes)
|
||||
# New HTML5 elements, "event-source", "section", "nav",
|
||||
# "article", "aside", "header", "footer", "datagrid", "command"
|
||||
# $stderr.puts("Warning: Undefined behaviour for start tag #{name}")
|
||||
startTagOther(name, attributes)
|
||||
#raise NotImplementedError
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insert_element(name, attributes)
|
||||
end
|
||||
|
||||
def endTagP(name)
|
||||
@tree.generateImpliedEndTags('p') if in_scope?('p')
|
||||
parse_error("unexpected-end-tag", {"name" => "p"}) unless @tree.open_elements.last.name == 'p'
|
||||
if in_scope?('p')
|
||||
@tree.open_elements.pop while in_scope?('p')
|
||||
else
|
||||
startTagCloseP('p', {})
|
||||
endTagP('p')
|
||||
end
|
||||
end
|
||||
|
||||
def endTagBody(name)
|
||||
# XXX Need to take open <p> tags into account here. We shouldn't imply
|
||||
# </p> but we should not throw a parse error either. Specification is
|
||||
# likely to be updated.
|
||||
unless @tree.open_elements[1] && @tree.open_elements[1].name == 'body'
|
||||
# inner_html case
|
||||
parse_error "unexpected-end-tag", {:name => 'body'}
|
||||
return
|
||||
end
|
||||
unless @tree.open_elements.last.name == 'body'
|
||||
parse_error("expected-one-end-tag-but-got-another",
|
||||
{"expectedName" => "body",
|
||||
"gotName" => @tree.open_elements.last.name})
|
||||
end
|
||||
@parser.phase = @parser.phases[:afterBody]
|
||||
end
|
||||
|
||||
def endTagHtml(name)
|
||||
endTagBody(name)
|
||||
@parser.phase.processEndTag(name) unless @parser.inner_html
|
||||
end
|
||||
|
||||
def endTagBlock(name)
|
||||
@tree.generateImpliedEndTags if in_scope?(name)
|
||||
|
||||
unless @tree.open_elements.last.name == name
|
||||
parse_error("end-tag-too-early", {"name" => name})
|
||||
end
|
||||
|
||||
if in_scope?(name)
|
||||
remove_open_elements_until(name)
|
||||
end
|
||||
end
|
||||
|
||||
def endTagForm(name)
|
||||
if in_scope?(name)
|
||||
@tree.generateImpliedEndTags
|
||||
end
|
||||
if @tree.open_elements.last.name != name
|
||||
parse_error("end-tag-too-early-ignored", {"name" => "form"})
|
||||
else
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
@tree.formPointer = nil
|
||||
end
|
||||
|
||||
def endTagListItem(name)
|
||||
# AT Could merge this with the Block case
|
||||
@tree.generateImpliedEndTags(name) if in_scope?(name)
|
||||
|
||||
unless @tree.open_elements.last.name == name
|
||||
parse_error("end-tag-too-early", {"name" => name})
|
||||
end
|
||||
|
||||
remove_open_elements_until(name) if in_scope?(name)
|
||||
end
|
||||
|
||||
def endTagHeading(name)
|
||||
HEADING_ELEMENTS.each do |element|
|
||||
if in_scope?(element)
|
||||
@tree.generateImpliedEndTags
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
unless @tree.open_elements.last.name == name
|
||||
parse_error("end-tag-too-early", {"name" => name})
|
||||
end
|
||||
|
||||
HEADING_ELEMENTS.each do |element|
|
||||
if in_scope?(element)
|
||||
remove_open_elements_until {|element| HEADING_ELEMENTS.include?(element.name)}
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# The much-feared adoption agency algorithm
|
||||
def endTagFormatting(name)
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
|
||||
# XXX Better parse_error messages appreciated.
|
||||
while true
|
||||
# Step 1 paragraph 1
|
||||
afeElement = @tree.elementInActiveFormattingElements(name)
|
||||
if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
|
||||
parse_error("adoption-agency-1.1", {"name" => name})
|
||||
return
|
||||
# Step 1 paragraph 2
|
||||
elsif not @tree.open_elements.include?(afeElement)
|
||||
parse_error("adoption-agency-1.2", {"name" => name})
|
||||
@tree.activeFormattingElements.delete(afeElement)
|
||||
return
|
||||
end
|
||||
|
||||
# Step 1 paragraph 3
|
||||
if afeElement != @tree.open_elements.last
|
||||
parse_error("adoption-agency-1.3", {"name" => name})
|
||||
end
|
||||
|
||||
# Step 2
|
||||
# Start of the adoption agency algorithm proper
|
||||
afeIndex = @tree.open_elements.index(afeElement)
|
||||
furthestBlock = nil
|
||||
@tree.open_elements[afeIndex..-1].each do |element|
|
||||
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
|
||||
furthestBlock = element
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
# Step 3
|
||||
if furthestBlock.nil?
|
||||
element = remove_open_elements_until {|element| element == afeElement }
|
||||
@tree.activeFormattingElements.delete(element)
|
||||
return
|
||||
end
|
||||
commonAncestor = @tree.open_elements[afeIndex - 1]
|
||||
|
||||
# Step 5
|
||||
furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
|
||||
|
||||
# Step 6
|
||||
# The bookmark is supposed to help us identify where to reinsert
|
||||
# nodes in step 12. We have to ensure that we reinsert nodes after
|
||||
# the node before the active formatting element. Note the bookmark
|
||||
# can move in step 7.4
|
||||
bookmark = @tree.activeFormattingElements.index(afeElement)
|
||||
|
||||
# Step 7
|
||||
lastNode = node = furthestBlock
|
||||
while true
|
||||
# AT replace this with a function and recursion?
|
||||
# Node is element before node in open elements
|
||||
node = @tree.open_elements[@tree.open_elements.index(node) - 1]
|
||||
until @tree.activeFormattingElements.include?(node)
|
||||
tmpNode = node
|
||||
node = @tree.open_elements[@tree.open_elements.index(node) - 1]
|
||||
@tree.open_elements.delete(tmpNode)
|
||||
end
|
||||
# Step 7.3
|
||||
break if node == afeElement
|
||||
# Step 7.4
|
||||
if lastNode == furthestBlock
|
||||
# XXX should this be index(node) or index(node)+1
|
||||
# Anne: I think +1 is ok. Given x = [2,3,4,5]
|
||||
# x.index(3) gives 1 and then x[1 +1] gives 4...
|
||||
bookmark = @tree.activeFormattingElements.index(node) + 1
|
||||
end
|
||||
# Step 7.5
|
||||
cite = node.parent
|
||||
if node.hasContent
|
||||
clone = node.cloneNode
|
||||
# Replace node with clone
|
||||
@tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
|
||||
@tree.open_elements[@tree.open_elements.index(node)] = clone
|
||||
node = clone
|
||||
end
|
||||
# Step 7.6
|
||||
# Remove lastNode from its parents, if any
|
||||
lastNode.parent.removeChild(lastNode) if lastNode.parent
|
||||
node.appendChild(lastNode)
|
||||
# Step 7.7
|
||||
lastNode = node
|
||||
# End of inner loop
|
||||
end
|
||||
|
||||
# Step 8
|
||||
lastNode.parent.removeChild(lastNode) if lastNode.parent
|
||||
commonAncestor.appendChild(lastNode)
|
||||
|
||||
# Step 9
|
||||
clone = afeElement.cloneNode
|
||||
|
||||
# Step 10
|
||||
furthestBlock.reparentChildren(clone)
|
||||
|
||||
# Step 11
|
||||
furthestBlock.appendChild(clone)
|
||||
|
||||
# Step 12
|
||||
@tree.activeFormattingElements.delete(afeElement)
|
||||
@tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
|
||||
|
||||
# Step 13
|
||||
@tree.open_elements.delete(afeElement)
|
||||
@tree.open_elements.insert(@tree.open_elements.index(furthestBlock) + 1, clone)
|
||||
end
|
||||
end
|
||||
|
||||
def endTagButtonMarqueeObject(name)
|
||||
@tree.generateImpliedEndTags if in_scope?(name)
|
||||
|
||||
unless @tree.open_elements.last.name == name
|
||||
parse_error("end-tag-too-early", {"name" => name})
|
||||
end
|
||||
|
||||
if in_scope?(name)
|
||||
remove_open_elements_until(name)
|
||||
|
||||
@tree.clearActiveFormattingElements
|
||||
end
|
||||
end
|
||||
|
||||
def endTagMisplaced(name)
|
||||
# This handles elements with end tags in other insertion modes.
|
||||
parse_error("unexpected-end-tag", {"name" => name})
|
||||
end
|
||||
|
||||
def endTagBr(name)
|
||||
parse_error("unexpected-end-tag-treated-as",
|
||||
{"originalName" => "br", "newName" => "br element"})
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insert_element(name, {})
|
||||
@tree.open_elements.pop()
|
||||
end
|
||||
|
||||
def endTagNone(name)
|
||||
# This handles elements with no end tag.
|
||||
parse_error("no-end-tag", {"name" => name})
|
||||
end
|
||||
|
||||
def endTagCdataTextAreaXmp(name)
|
||||
if @tree.open_elements.last.name == name
|
||||
@tree.open_elements.pop
|
||||
else
|
||||
parse_error("unexpected-end-tag", {"name" => name})
|
||||
end
|
||||
end
|
||||
|
||||
def endTagNew(name)
|
||||
# New HTML5 elements, "event-source", "section", "nav",
|
||||
# "article", "aside", "header", "footer", "datagrid", "command"
|
||||
# STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
|
||||
endTagOther(name)
|
||||
#raise NotImplementedError
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
# XXX This logic should be moved into the treebuilder
|
||||
@tree.open_elements.reverse.each do |node|
|
||||
if node.name == name
|
||||
@tree.generateImpliedEndTags
|
||||
|
||||
unless @tree.open_elements.last.name == name
|
||||
parse_error("unexpected-end-tag", {"name" => name})
|
||||
end
|
||||
|
||||
remove_open_elements_until {|element| element == node }
|
||||
|
||||
break
|
||||
else
|
||||
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
|
||||
parse_error("unexpected-end-tag", {"name" => name})
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def addFormattingElement(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.activeFormattingElements.push(@tree.open_elements.last)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
69
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_caption_phase.rb
vendored
Normal file
69
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_caption_phase.rb
vendored
Normal file
|
@ -0,0 +1,69 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InCaptionPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-caption
|
||||
|
||||
handle_start 'html', %w(caption col colgroup tbody td tfoot th thead tr) => 'TableElement'
|
||||
|
||||
handle_end 'caption', 'table', %w(body col colgroup html tbody td tfoot th thead tr) => 'Ignore'
|
||||
|
||||
def ignoreEndTagCaption
|
||||
!in_scope?('caption', true)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.phases[:inBody].processCharacters(data)
|
||||
end
|
||||
|
||||
def startTagTableElement(name, attributes)
|
||||
parse_error "unexpected-end-tag", {"name" => name}
|
||||
#XXX Have to duplicate logic here to find out if the tag is ignored
|
||||
ignoreEndTag = ignoreEndTagCaption
|
||||
@parser.phase.processEndTag('caption')
|
||||
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagCaption(name)
|
||||
if ignoreEndTagCaption
|
||||
# inner_html case
|
||||
assert @parser.inner_html
|
||||
parse_error "unexpected-end-tag", {"name" => name}
|
||||
else
|
||||
# AT this code is quite similar to endTagTable in "InTable"
|
||||
@tree.generateImpliedEndTags
|
||||
|
||||
unless @tree.open_elements[-1].name == 'caption'
|
||||
parse_error("expected-one-end-tag-but-got-another",
|
||||
{"gotName" => "caption",
|
||||
"expectedName" => @tree.open_elements.last.name})
|
||||
end
|
||||
|
||||
remove_open_elements_until('caption')
|
||||
|
||||
@tree.clearActiveFormattingElements
|
||||
@parser.phase = @parser.phases[:inTable]
|
||||
end
|
||||
end
|
||||
|
||||
def endTagTable(name)
|
||||
parse_error "unexpected-end-table-in-caption"
|
||||
ignoreEndTag = ignoreEndTagCaption
|
||||
@parser.phase.processEndTag('caption')
|
||||
@parser.phase.processEndTag(name) unless ignoreEndTag
|
||||
end
|
||||
|
||||
def endTagIgnore(name)
|
||||
parse_error("unexpected-end-tag", {"name" => name})
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.phases[:inBody].processEndTag(name)
|
||||
end
|
||||
end
|
||||
end
|
78
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_cell_phase.rb
vendored
Normal file
78
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_cell_phase.rb
vendored
Normal file
|
@ -0,0 +1,78 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InCellPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-cell
|
||||
|
||||
handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableOther'
|
||||
|
||||
handle_end %w( td th ) => 'TableCell', %w( body caption col colgroup html ) => 'Ignore'
|
||||
|
||||
handle_end %w( table tbody tfoot thead tr ) => 'Imply'
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.phases[:inBody].processCharacters(data)
|
||||
end
|
||||
|
||||
def startTagTableOther(name, attributes)
|
||||
if in_scope?('td', true) or in_scope?('th', true)
|
||||
closeCell
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
else
|
||||
# inner_html case
|
||||
parse_error
|
||||
end
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagTableCell(name)
|
||||
if in_scope?(name, true)
|
||||
@tree.generateImpliedEndTags(name)
|
||||
if @tree.open_elements.last.name != name
|
||||
parse_error("unexpected-cell-end-tag", {"name" => name})
|
||||
|
||||
remove_open_elements_until(name)
|
||||
else
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
@tree.clearActiveFormattingElements
|
||||
@parser.phase = @parser.phases[:inRow]
|
||||
else
|
||||
parse_error("unexpected-end-tag", {"name" => name})
|
||||
end
|
||||
end
|
||||
|
||||
def endTagIgnore(name)
|
||||
parse_error("unexpected-end-tag", {"name" => name})
|
||||
end
|
||||
|
||||
def endTagImply(name)
|
||||
if in_scope?(name, true)
|
||||
closeCell
|
||||
@parser.phase.processEndTag(name)
|
||||
else
|
||||
# sometimes inner_html case
|
||||
parse_error "unexpected-end-tag", {:name => name}
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.phases[:inBody].processEndTag(name)
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def closeCell
|
||||
if in_scope?('td', true)
|
||||
endTagTableCell('td')
|
||||
elsif in_scope?('th', true)
|
||||
endTagTableCell('th')
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
55
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_column_group_phase.rb
vendored
Normal file
55
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_column_group_phase.rb
vendored
Normal file
|
@ -0,0 +1,55 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InColumnGroupPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-column
|
||||
|
||||
handle_start 'html', 'col'
|
||||
|
||||
handle_end 'colgroup', 'col'
|
||||
|
||||
def ignoreEndTagColgroup
|
||||
@tree.open_elements[-1].name == 'html'
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
ignoreEndTag = ignoreEndTagColgroup
|
||||
endTagColgroup("colgroup")
|
||||
@parser.phase.processCharacters(data) unless ignoreEndTag
|
||||
end
|
||||
|
||||
def startTagCol(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
ignoreEndTag = ignoreEndTagColgroup
|
||||
endTagColgroup('colgroup')
|
||||
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
|
||||
end
|
||||
|
||||
def endTagColgroup(name)
|
||||
if ignoreEndTagColgroup
|
||||
# inner_html case
|
||||
assert @parser.inner_html
|
||||
parse_error "unexpected-end-tag", {:name => name}
|
||||
else
|
||||
@tree.open_elements.pop
|
||||
@parser.phase = @parser.phases[:inTable]
|
||||
end
|
||||
end
|
||||
|
||||
def endTagCol(name)
|
||||
parse_error("no-end-tag", {"name" => "col"})
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
ignoreEndTag = ignoreEndTagColgroup
|
||||
endTagColgroup('colgroup')
|
||||
@parser.phase.processEndTag(name) unless ignoreEndTag
|
||||
end
|
||||
|
||||
end
|
||||
end
|
56
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_frameset_phase.rb
vendored
Normal file
56
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_frameset_phase.rb
vendored
Normal file
|
@ -0,0 +1,56 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InFramesetPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
|
||||
|
||||
handle_start 'html', 'frameset', 'frame', 'noframes'
|
||||
|
||||
handle_end 'frameset', 'noframes'
|
||||
|
||||
def processCharacters(data)
|
||||
parse_error("unexpected-char-in-frameset")
|
||||
end
|
||||
|
||||
def startTagFrameset(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
end
|
||||
|
||||
def startTagFrame(name, attributes)
|
||||
@tree.insert_element(name, attributes)
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
|
||||
def startTagNoframes(name, attributes)
|
||||
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
parse_error("unexpected-start-tag-in-frameset", {"name" => name})
|
||||
end
|
||||
|
||||
def endTagFrameset(name)
|
||||
if @tree.open_elements.last.name == 'html'
|
||||
# inner_html case
|
||||
parse_error("unexpected-frameset-in-frameset-innerhtml")
|
||||
else
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
if (not @parser.inner_html and
|
||||
@tree.open_elements.last.name != 'frameset')
|
||||
# If we're not in inner_html mode and the the current node is not a
|
||||
# "frameset" element (anymore) then switch.
|
||||
@parser.phase = @parser.phases[:afterFrameset]
|
||||
end
|
||||
end
|
||||
|
||||
def endTagNoframes(name)
|
||||
@parser.phases[:inBody].processEndTag(name)
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
parse_error("unexpected-end-tag-in-frameset", {"name" => name})
|
||||
end
|
||||
end
|
||||
end
|
138
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_head_phase.rb
vendored
Normal file
138
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_head_phase.rb
vendored
Normal file
|
@ -0,0 +1,138 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InHeadPhase < Phase
|
||||
|
||||
handle_start 'html', 'head', 'title', 'style', 'script', 'noscript'
|
||||
handle_start %w( base link meta )
|
||||
|
||||
handle_end 'head'
|
||||
handle_end %w( html body br p ) => 'ImplyAfterHead'
|
||||
handle_end %w( title style script noscript )
|
||||
|
||||
def process_eof
|
||||
if ['title', 'style', 'script'].include?(name = @tree.open_elements.last.name)
|
||||
parse_error("expected-named-closing-tag-but-got-eof", {"name" => @tree.open_elements.last.name})
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
anything_else
|
||||
@parser.phase.process_eof
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
if %w[title style script noscript].include?(@tree.open_elements.last.name)
|
||||
@tree.insertText(data)
|
||||
else
|
||||
anything_else
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
end
|
||||
|
||||
def startTagHead(name, attributes)
|
||||
parse_error("two-heads-are-not-better-than-one")
|
||||
end
|
||||
|
||||
def startTagTitle(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
appendToHead(element)
|
||||
@tree.open_elements.push(element)
|
||||
@parser.tokenizer.content_model_flag = :RCDATA
|
||||
end
|
||||
|
||||
def startTagStyle(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
|
||||
appendToHead(element)
|
||||
else
|
||||
@tree.open_elements.last.appendChild(element)
|
||||
end
|
||||
@tree.open_elements.push(element)
|
||||
@parser.tokenizer.content_model_flag = :CDATA
|
||||
end
|
||||
|
||||
def startTagNoscript(name, attributes)
|
||||
# XXX Need to decide whether to implement the scripting disabled case.
|
||||
element = @tree.createElement(name, attributes)
|
||||
if @tree.head_pointer !=nil and @parser.phase == @parser.phases[:inHead]
|
||||
appendToHead(element)
|
||||
else
|
||||
@tree.open_elements.last.appendChild(element)
|
||||
end
|
||||
@tree.open_elements.push(element)
|
||||
@parser.tokenizer.content_model_flag = :CDATA
|
||||
end
|
||||
|
||||
def startTagScript(name, attributes)
|
||||
#XXX Inner HTML case may be wrong
|
||||
element = @tree.createElement(name, attributes)
|
||||
element._flags.push("parser-inserted")
|
||||
if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
|
||||
appendToHead(element)
|
||||
else
|
||||
@tree.open_elements.last.appendChild(element)
|
||||
end
|
||||
@tree.open_elements.push(element)
|
||||
@parser.tokenizer.content_model_flag = :CDATA
|
||||
end
|
||||
|
||||
def startTagBaseLinkMeta(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
if @tree.head_pointer != nil and @parser.phase == @parser.phases[:inHead]
|
||||
appendToHead(element)
|
||||
else
|
||||
@tree.open_elements.last.appendChild(element)
|
||||
end
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
anything_else
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagHead(name)
|
||||
if @tree.open_elements.last.name == 'head'
|
||||
@tree.open_elements.pop
|
||||
else
|
||||
parse_error("unexpected-end-tag", {"name" => "head"})
|
||||
end
|
||||
@parser.phase = @parser.phases[:afterHead]
|
||||
end
|
||||
|
||||
def endTagImplyAfterHead(name)
|
||||
anything_else
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
def endTagTitleStyleScriptNoscript(name)
|
||||
if @tree.open_elements.last.name == name
|
||||
@tree.open_elements.pop
|
||||
else
|
||||
parse_error("unexpected-end-tag", {"name" => name})
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
parse_error("unexpected-end-tag", {"name" => name})
|
||||
end
|
||||
|
||||
def anything_else
|
||||
if @tree.open_elements.last.name == 'head'
|
||||
endTagHead('head')
|
||||
else
|
||||
@parser.phase = @parser.phases[:afterHead]
|
||||
end
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def appendToHead(element)
|
||||
if @tree.head_pointer.nil?
|
||||
assert @parser.inner_html
|
||||
@tree.open_elements.last.appendChild(element)
|
||||
else
|
||||
@tree.head_pointer.appendChild(element)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
88
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_row_phase.rb
vendored
Normal file
88
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_row_phase.rb
vendored
Normal file
|
@ -0,0 +1,88 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InRowPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-row
|
||||
|
||||
handle_start 'html', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead tr ) => 'TableOther'
|
||||
|
||||
handle_end 'tr', 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th ) => 'Ignore'
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.phases[:inTable].processCharacters(data)
|
||||
end
|
||||
|
||||
def startTagTableCell(name, attributes)
|
||||
clearStackToTableRowContext
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inCell]
|
||||
@tree.activeFormattingElements.push(Marker)
|
||||
end
|
||||
|
||||
def startTagTableOther(name, attributes)
|
||||
ignoreEndTag = ignoreEndTagTr
|
||||
endTagTr('tr')
|
||||
# XXX how are we sure it's always ignored in the inner_html case?
|
||||
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@parser.phases[:inTable].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagTr(name)
|
||||
if ignoreEndTagTr
|
||||
# inner_html case
|
||||
assert @parser.inner_html
|
||||
parse_error "unexpected-end-tag", {:name => name}
|
||||
else
|
||||
clearStackToTableRowContext
|
||||
@tree.open_elements.pop
|
||||
@parser.phase = @parser.phases[:inTableBody]
|
||||
end
|
||||
end
|
||||
|
||||
def endTagTable(name)
|
||||
ignoreEndTag = ignoreEndTagTr
|
||||
endTagTr('tr')
|
||||
# Reprocess the current tag if the tr end tag was not ignored
|
||||
# XXX how are we sure it's always ignored in the inner_html case?
|
||||
@parser.phase.processEndTag(name) unless ignoreEndTag
|
||||
end
|
||||
|
||||
def endTagTableRowGroup(name)
|
||||
if in_scope?(name, true)
|
||||
endTagTr('tr')
|
||||
@parser.phase.processEndTag(name)
|
||||
else
|
||||
# inner_html case
|
||||
parse_error "unexpected-end-tag", {:name => name}
|
||||
end
|
||||
end
|
||||
|
||||
def endTagIgnore(name)
|
||||
parse_error("unexpected-end-tag-in-table-row",
|
||||
{"name" => name})
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.phases[:inTable].processEndTag(name)
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
# XXX unify this with other table helper methods
|
||||
def clearStackToTableRowContext
|
||||
until %w[tr html].include?(name = @tree.open_elements.last.name)
|
||||
parse_error("unexpected-implied-end-tag-in-table-row", {"name" => @tree.open_elements.last.name})
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
end
|
||||
|
||||
def ignoreEndTagTr
|
||||
not in_scope?('tr', :tableVariant => true)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
85
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb
vendored
Normal file
85
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb
vendored
Normal file
|
@ -0,0 +1,85 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InSelectPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
|
||||
|
||||
handle_start 'html', 'option', 'optgroup', 'select'
|
||||
|
||||
handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
|
||||
|
||||
def processCharacters(data)
|
||||
@tree.insertText(data)
|
||||
end
|
||||
|
||||
def startTagOption(name, attributes)
|
||||
# We need to imply </option> if <option> is the current node.
|
||||
@tree.open_elements.pop if @tree.open_elements.last.name == 'option'
|
||||
@tree.insert_element(name, attributes)
|
||||
end
|
||||
|
||||
def startTagOptgroup(name, attributes)
|
||||
@tree.open_elements.pop if @tree.open_elements.last.name == 'option'
|
||||
@tree.open_elements.pop if @tree.open_elements.last.name == 'optgroup'
|
||||
@tree.insert_element(name, attributes)
|
||||
end
|
||||
|
||||
def startTagSelect(name, attributes)
|
||||
parse_error("unexpected-select-in-select")
|
||||
endTagSelect('select')
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
parse_error("unexpected-start-tag-in-select", {"name" => name})
|
||||
end
|
||||
|
||||
def endTagOption(name)
|
||||
if @tree.open_elements.last.name == 'option'
|
||||
@tree.open_elements.pop
|
||||
else
|
||||
parse_error("unexpected-end-tag-in-select", {"name" => "option"})
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOptgroup(name)
|
||||
# </optgroup> implicitly closes <option>
|
||||
if @tree.open_elements.last.name == 'option' and @tree.open_elements[-2].name == 'optgroup'
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
# It also closes </optgroup>
|
||||
if @tree.open_elements.last.name == 'optgroup'
|
||||
@tree.open_elements.pop
|
||||
# But nothing else
|
||||
else
|
||||
parse_error("unexpected-end-tag-in-select",
|
||||
{"name" => "optgroup"})
|
||||
end
|
||||
end
|
||||
|
||||
def endTagSelect(name)
|
||||
if in_scope?('select', true)
|
||||
remove_open_elements_until('select')
|
||||
|
||||
@parser.reset_insertion_mode
|
||||
else
|
||||
# inner_html case
|
||||
parse_error
|
||||
end
|
||||
end
|
||||
|
||||
def endTagTableElements(name)
|
||||
parse_error("unexpected-end-tag-in-select", {"name" => name})
|
||||
|
||||
if in_scope?(name, true)
|
||||
endTagSelect('select')
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
parse_error("unexpected-end-tag-in-select", {"name" => name})
|
||||
end
|
||||
|
||||
end
|
||||
end
|
84
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_body_phase.rb
vendored
Normal file
84
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_body_phase.rb
vendored
Normal file
|
@ -0,0 +1,84 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InTableBodyPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
|
||||
|
||||
handle_start 'html', 'tr', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead ) => 'TableOther'
|
||||
|
||||
handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ignore'
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.phases[:inTable].processCharacters(data)
|
||||
end
|
||||
|
||||
def startTagTr(name, attributes)
|
||||
clearStackToTableBodyContext
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inRow]
|
||||
end
|
||||
|
||||
def startTagTableCell(name, attributes)
|
||||
parse_error("unexpected-cell-in-table-body", {"name" => name})
|
||||
startTagTr('tr', {})
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagTableOther(name, attributes)
|
||||
# XXX AT Any ideas on how to share this with endTagTable?
|
||||
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
|
||||
clearStackToTableBodyContext
|
||||
endTagTableRowGroup(@tree.open_elements.last.name)
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
else
|
||||
# inner_html case
|
||||
parse_error "unexpected-start-tag", {:name => name}
|
||||
end
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@parser.phases[:inTable].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagTableRowGroup(name)
|
||||
if in_scope?(name, true)
|
||||
clearStackToTableBodyContext
|
||||
@tree.open_elements.pop
|
||||
@parser.phase = @parser.phases[:inTable]
|
||||
else
|
||||
parse_error("unexpected-end-tag-in-table-body", {"name" => name})
|
||||
end
|
||||
end
|
||||
|
||||
def endTagTable(name)
|
||||
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
|
||||
clearStackToTableBodyContext
|
||||
endTagTableRowGroup(@tree.open_elements.last.name)
|
||||
@parser.phase.processEndTag(name)
|
||||
else
|
||||
# inner_html case
|
||||
parse_error "unexpected-end-tag", {:name => name}
|
||||
end
|
||||
end
|
||||
|
||||
def endTagIgnore(name)
|
||||
parse_error("unexpected-end-tag-in-table-body", {"name" => name})
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.phases[:inTable].processEndTag(name)
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def clearStackToTableBodyContext
|
||||
until %w[tbody tfoot thead html].include?(name = @tree.open_elements.last.name)
|
||||
parse_error("unexpected-implied-end-tag-in-table",
|
||||
{"name" => @tree.open_elements.last.name})
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
115
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_phase.rb
vendored
Normal file
115
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_phase.rb
vendored
Normal file
|
@ -0,0 +1,115 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InTablePhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-table
|
||||
|
||||
handle_start 'html', 'caption', 'colgroup', 'col', 'table'
|
||||
|
||||
handle_start %w( tbody tfoot thead ) => 'RowGroup', %w( td th tr ) => 'ImplyTbody'
|
||||
|
||||
handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
|
||||
|
||||
def processCharacters(data)
|
||||
parse_error("unexpected-char-implies-table-voodoo")
|
||||
# Make all the special element rearranging voodoo kick in
|
||||
@tree.insert_from_table = true
|
||||
# Process the character in the "in body" mode
|
||||
@parser.phases[:inBody].processCharacters(data)
|
||||
@tree.insert_from_table = false
|
||||
end
|
||||
|
||||
def startTagCaption(name, attributes)
|
||||
clearStackToTableContext
|
||||
@tree.activeFormattingElements.push(Marker)
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inCaption]
|
||||
end
|
||||
|
||||
def startTagColgroup(name, attributes)
|
||||
clearStackToTableContext
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inColumnGroup]
|
||||
end
|
||||
|
||||
def startTagCol(name, attributes)
|
||||
startTagColgroup('colgroup', {})
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagRowGroup(name, attributes)
|
||||
clearStackToTableContext
|
||||
@tree.insert_element(name, attributes)
|
||||
@parser.phase = @parser.phases[:inTableBody]
|
||||
end
|
||||
|
||||
def startTagImplyTbody(name, attributes)
|
||||
startTagRowGroup('tbody', {})
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagTable(name, attributes)
|
||||
parse_error("unexpected-start-tag-implies-end-tag",
|
||||
{"startName" => "table", "endName" => "table"})
|
||||
@parser.phase.processEndTag('table')
|
||||
@parser.phase.processStartTag(name, attributes) unless @parser.inner_html
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
parse_error("unexpected-start-tag-implies-table-voodoo",
|
||||
{"name" => name})
|
||||
# Make all the special element rearranging voodoo kick in
|
||||
@tree.insert_from_table = true
|
||||
# Process the start tag in the "in body" mode
|
||||
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||
@tree.insert_from_table = false
|
||||
end
|
||||
|
||||
def endTagTable(name)
|
||||
if in_scope?('table', true)
|
||||
@tree.generateImpliedEndTags
|
||||
|
||||
unless @tree.open_elements.last.name == 'table'
|
||||
parse_error("end-tag-too-early-named",
|
||||
{"gotName" => "table",
|
||||
"expectedName" => @tree.open_elements.last.name})
|
||||
end
|
||||
|
||||
remove_open_elements_until('table')
|
||||
|
||||
@parser.reset_insertion_mode
|
||||
else
|
||||
# inner_html case
|
||||
assert @parser.inner_html
|
||||
parse_error "unexpected-end-tag", {:name => name}
|
||||
end
|
||||
end
|
||||
|
||||
def endTagIgnore(name)
|
||||
parse_error("unexpected-end-tag", {"name" => name})
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
parse_error("unexpected-end-tag-implies-table-voodoo", {"name" => name})
|
||||
# Make all the special element rearranging voodoo kick in
|
||||
@tree.insert_from_table = true
|
||||
# Process the end tag in the "in body" mode
|
||||
@parser.phases[:inBody].processEndTag(name)
|
||||
@tree.insert_from_table = false
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def clearStackToTableContext
|
||||
# "clear the stack back to a table context"
|
||||
until %w[table html].include?(name = @tree.open_elements.last.name)
|
||||
parse_error("unexpected-implied-end-tag-in-table",
|
||||
{"name" => @tree.open_elements.last.name})
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
# When the current node is <html> it's an inner_html case
|
||||
end
|
||||
|
||||
end
|
||||
end
|
133
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/initial_phase.rb
vendored
Normal file
133
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/initial_phase.rb
vendored
Normal file
|
@ -0,0 +1,133 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InitialPhase < Phase
|
||||
|
||||
# This phase deals with error handling as well which is currently not
|
||||
# covered in the specification. The error handling is typically known as
|
||||
# "quirks mode". It is expected that a future version of HTML5 will define this.
|
||||
|
||||
def process_eof
|
||||
parse_error("expected-doctype-but-got-eof")
|
||||
@parser.phase = @parser.phases[:rootElement]
|
||||
@parser.phase.process_eof
|
||||
end
|
||||
|
||||
def processComment(data)
|
||||
@tree.insert_comment(data, @tree.document)
|
||||
end
|
||||
|
||||
def processDoctype(name, publicId, systemId, correct)
|
||||
if name.downcase != 'html' or publicId or systemId
|
||||
parse_error("unknown-doctype")
|
||||
end
|
||||
# XXX need to update DOCTYPE tokens
|
||||
@tree.insertDoctype(name, publicId, systemId)
|
||||
|
||||
publicId = publicId.to_s.upcase
|
||||
|
||||
if name.downcase != 'html'
|
||||
# XXX quirks mode
|
||||
else
|
||||
if ["+//silmaril//dtd html pro v0r11 19970101//en",
|
||||
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
|
||||
"-//as//dtd html 3.0 aswedit + extensions//en",
|
||||
"-//ietf//dtd html 2.0 level 1//en",
|
||||
"-//ietf//dtd html 2.0 level 2//en",
|
||||
"-//ietf//dtd html 2.0 strict level 1//en",
|
||||
"-//ietf//dtd html 2.0 strict level 2//en",
|
||||
"-//ietf//dtd html 2.0 strict//en",
|
||||
"-//ietf//dtd html 2.0//en",
|
||||
"-//ietf//dtd html 2.1e//en",
|
||||
"-//ietf//dtd html 3.0//en",
|
||||
"-//ietf//dtd html 3.0//en//",
|
||||
"-//ietf//dtd html 3.2 final//en",
|
||||
"-//ietf//dtd html 3.2//en",
|
||||
"-//ietf//dtd html 3//en",
|
||||
"-//ietf//dtd html level 0//en",
|
||||
"-//ietf//dtd html level 0//en//2.0",
|
||||
"-//ietf//dtd html level 1//en",
|
||||
"-//ietf//dtd html level 1//en//2.0",
|
||||
"-//ietf//dtd html level 2//en",
|
||||
"-//ietf//dtd html level 2//en//2.0",
|
||||
"-//ietf//dtd html level 3//en",
|
||||
"-//ietf//dtd html level 3//en//3.0",
|
||||
"-//ietf//dtd html strict level 0//en",
|
||||
"-//ietf//dtd html strict level 0//en//2.0",
|
||||
"-//ietf//dtd html strict level 1//en",
|
||||
"-//ietf//dtd html strict level 1//en//2.0",
|
||||
"-//ietf//dtd html strict level 2//en",
|
||||
"-//ietf//dtd html strict level 2//en//2.0",
|
||||
"-//ietf//dtd html strict level 3//en",
|
||||
"-//ietf//dtd html strict level 3//en//3.0",
|
||||
"-//ietf//dtd html strict//en",
|
||||
"-//ietf//dtd html strict//en//2.0",
|
||||
"-//ietf//dtd html strict//en//3.0",
|
||||
"-//ietf//dtd html//en",
|
||||
"-//ietf//dtd html//en//2.0",
|
||||
"-//ietf//dtd html//en//3.0",
|
||||
"-//metrius//dtd metrius presentational//en",
|
||||
"-//microsoft//dtd internet explorer 2.0 html strict//en",
|
||||
"-//microsoft//dtd internet explorer 2.0 html//en",
|
||||
"-//microsoft//dtd internet explorer 2.0 tables//en",
|
||||
"-//microsoft//dtd internet explorer 3.0 html strict//en",
|
||||
"-//microsoft//dtd internet explorer 3.0 html//en",
|
||||
"-//microsoft//dtd internet explorer 3.0 tables//en",
|
||||
"-//netscape comm. corp.//dtd html//en",
|
||||
"-//netscape comm. corp.//dtd strict html//en",
|
||||
"-//o'reilly and associates//dtd html 2.0//en",
|
||||
"-//o'reilly and associates//dtd html extended 1.0//en",
|
||||
"-//spyglass//dtd html 2.0 extended//en",
|
||||
"-//sq//dtd html 2.0 hotmetal + extensions//en",
|
||||
"-//sun microsystems corp.//dtd hotjava html//en",
|
||||
"-//sun microsystems corp.//dtd hotjava strict html//en",
|
||||
"-//w3c//dtd html 3 1995-03-24//en",
|
||||
"-//w3c//dtd html 3.2 draft//en",
|
||||
"-//w3c//dtd html 3.2 final//en",
|
||||
"-//w3c//dtd html 3.2//en",
|
||||
"-//w3c//dtd html 3.2s draft//en",
|
||||
"-//w3c//dtd html 4.0 frameset//en",
|
||||
"-//w3c//dtd html 4.0 transitional//en",
|
||||
"-//w3c//dtd html experimental 19960712//en",
|
||||
"-//w3c//dtd html experimental 970421//en",
|
||||
"-//w3c//dtd w3 html//en",
|
||||
"-//w3o//dtd w3 html 3.0//en",
|
||||
"-//w3o//dtd w3 html 3.0//en//",
|
||||
"-//w3o//dtd w3 html strict 3.0//en//",
|
||||
"-//webtechs//dtd mozilla html 2.0//en",
|
||||
"-//webtechs//dtd mozilla html//en",
|
||||
"-/w3c/dtd html 4.0 transitional/en",
|
||||
"html"].include?(publicId) or
|
||||
(systemId == nil and
|
||||
["-//w3c//dtd html 4.01 frameset//EN",
|
||||
"-//w3c//dtd html 4.01 transitional//EN"].include?(publicId)) or
|
||||
(systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
|
||||
#XXX quirks mode
|
||||
end
|
||||
end
|
||||
|
||||
@parser.phase = @parser.phases[:rootElement]
|
||||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
parse_error("expected-doctype-but-got-chars")
|
||||
@parser.phase = @parser.phases[:rootElement]
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def processStartTag(name, attributes)
|
||||
parse_error("expected-doctype-but-got-start-tag", {"name" => name})
|
||||
@parser.phase = @parser.phases[:rootElement]
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def processEndTag(name)
|
||||
parse_error("expected-doctype-but-got-end-tag", {"name" => name})
|
||||
@parser.phase = @parser.phases[:rootElement]
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
end
|
||||
end
|
154
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/phase.rb
vendored
Normal file
154
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/phase.rb
vendored
Normal file
|
@ -0,0 +1,154 @@
|
|||
module HTML5
|
||||
# Base class for helper objects that implement each phase of processing.
|
||||
#
|
||||
# Handler methods should be in the following order (they can be omitted):
|
||||
#
|
||||
# * EOF
|
||||
# * Comment
|
||||
# * Doctype
|
||||
# * SpaceCharacters
|
||||
# * Characters
|
||||
# * StartTag
|
||||
# - startTag* methods
|
||||
# * EndTag
|
||||
# - endTag* methods
|
||||
#
|
||||
class Phase
|
||||
|
||||
extend Forwardable
|
||||
def_delegators :@parser, :parse_error
|
||||
|
||||
# The following example call:
|
||||
#
|
||||
# tag_handlers('startTag', 'html', %w( base link meta ), %w( li dt dd ) => 'ListItem')
|
||||
#
|
||||
# ...would return a hash equal to this:
|
||||
#
|
||||
# { 'html' => 'startTagHtml',
|
||||
# 'base' => 'startTagBaseLinkMeta',
|
||||
# 'link' => 'startTagBaseLinkMeta',
|
||||
# 'meta' => 'startTagBaseLinkMeta',
|
||||
# 'li' => 'startTagListItem',
|
||||
# 'dt' => 'startTagListItem',
|
||||
# 'dd' => 'startTagListItem' }
|
||||
#
|
||||
def self.tag_handlers(prefix, *tags)
|
||||
mapping = {}
|
||||
if tags.last.is_a?(Hash)
|
||||
tags.pop.each do |names, handler_method_suffix|
|
||||
handler_method = prefix + handler_method_suffix
|
||||
Array(names).each {|name| mapping[name] = handler_method }
|
||||
end
|
||||
end
|
||||
tags.each do |names|
|
||||
names = Array(names)
|
||||
handler_method = prefix + names.map {|name| name.capitalize }.join
|
||||
names.each {|name| mapping[name] = handler_method }
|
||||
end
|
||||
mapping
|
||||
end
|
||||
|
||||
def self.start_tag_handlers
|
||||
@start_tag_handlers ||= Hash.new('startTagOther')
|
||||
end
|
||||
|
||||
# Declare what start tags this Phase handles. Can be called more than once.
|
||||
#
|
||||
# Example usage:
|
||||
#
|
||||
# handle_start 'html'
|
||||
# # html start tags will be handled by a method named 'startTagHtml'
|
||||
#
|
||||
# handle_start %( base link meta )
|
||||
# # base, link and meta start tags will be handled by a method named 'startTagBaseLinkMeta'
|
||||
#
|
||||
# handle_start %( li dt dd ) => 'ListItem'
|
||||
# # li, dt, and dd start tags will be handled by a method named 'startTagListItem'
|
||||
#
|
||||
def self.handle_start(*tags)
|
||||
start_tag_handlers.update tag_handlers('startTag', *tags)
|
||||
end
|
||||
|
||||
def self.end_tag_handlers
|
||||
@end_tag_handlers ||= Hash.new('endTagOther')
|
||||
end
|
||||
|
||||
# Declare what end tags this Phase handles. Behaves like handle_start.
|
||||
#
|
||||
def self.handle_end(*tags)
|
||||
end_tag_handlers.update tag_handlers('endTag', *tags)
|
||||
end
|
||||
|
||||
def initialize(parser, tree)
|
||||
@parser, @tree = parser, tree
|
||||
end
|
||||
|
||||
def process_eof
|
||||
@tree.generateImpliedEndTags
|
||||
|
||||
if @tree.open_elements.length > 2
|
||||
parse_error("expected-closing-tag-but-got-eof")
|
||||
elsif @tree.open_elements.length == 2 and @tree.open_elements[1].name != 'body'
|
||||
# This happens for framesets or something?
|
||||
parse_error("expected-closing-tag-but-got-eof")
|
||||
elsif @parser.inner_html and @tree.open_elements.length > 1
|
||||
# XXX This is not what the specification says. Not sure what to do here.
|
||||
parse_error("eof-in-innerhtml")
|
||||
end
|
||||
# Betting ends.
|
||||
end
|
||||
|
||||
def processComment(data)
|
||||
# For most phases the following is correct. Where it's not it will be
|
||||
# overridden.
|
||||
@tree.insert_comment(data, @tree.open_elements.last)
|
||||
end
|
||||
|
||||
def processDoctype(name, publicId, systemId, correct)
|
||||
parse_error("unexpected-doctype")
|
||||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
@tree.insertText(data)
|
||||
end
|
||||
|
||||
def processStartTag(name, attributes)
|
||||
send self.class.start_tag_handlers[name], name, attributes
|
||||
end
|
||||
|
||||
def startTagHtml(name, attributes)
|
||||
if @parser.first_start_tag == false and name == 'html'
|
||||
parse_error("non-html-root")
|
||||
end
|
||||
# XXX Need a check here to see if the first start tag token emitted is
|
||||
# this token... If it's not, invoke parse_error.
|
||||
attributes.each do |attr, value|
|
||||
unless @tree.open_elements.first.attributes.has_key?(attr)
|
||||
@tree.open_elements.first.attributes[attr] = value
|
||||
end
|
||||
end
|
||||
@parser.first_start_tag = false
|
||||
end
|
||||
|
||||
def processEndTag(name)
|
||||
send self.class.end_tag_handlers[name], name
|
||||
end
|
||||
|
||||
def assert(value)
|
||||
throw AssertionError.new unless value
|
||||
end
|
||||
|
||||
def in_scope?(*args)
|
||||
@tree.elementInScope(*args)
|
||||
end
|
||||
|
||||
def remove_open_elements_until(name=nil)
|
||||
finished = false
|
||||
until finished || @tree.open_elements.length == 0
|
||||
element = @tree.open_elements.pop
|
||||
finished = name.nil? ? yield(element) : element.name == name
|
||||
end
|
||||
return element
|
||||
end
|
||||
end
|
||||
end
|
41
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/root_element_phase.rb
vendored
Normal file
41
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/root_element_phase.rb
vendored
Normal file
|
@ -0,0 +1,41 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class RootElementPhase < Phase
|
||||
|
||||
def process_eof
|
||||
insert_html_element
|
||||
@parser.phase.process_eof
|
||||
end
|
||||
|
||||
def processComment(data)
|
||||
@tree.insert_comment(data, @tree.document)
|
||||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
insert_html_element
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def processStartTag(name, attributes)
|
||||
@parser.first_start_tag = true if name == 'html'
|
||||
insert_html_element
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def processEndTag(name)
|
||||
insert_html_element
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
def insert_html_element
|
||||
element = @tree.createElement('html', {})
|
||||
@tree.open_elements << element
|
||||
@tree.document.appendChild(element)
|
||||
@parser.phase = @parser.phases[:beforeHead]
|
||||
end
|
||||
end
|
||||
end
|
35
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/trailing_end_phase.rb
vendored
Normal file
35
attic/vendor/plugins/HTML5lib/lib/html5/html5parser/trailing_end_phase.rb
vendored
Normal file
|
@ -0,0 +1,35 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class TrailingEndPhase < Phase
|
||||
|
||||
def process_eof
|
||||
end
|
||||
|
||||
def processComment(data)
|
||||
@tree.insert_comment(data, @tree.document)
|
||||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
@parser.last_phase.processSpaceCharacters(data)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
parse_error("expected-eof-but-got-char")
|
||||
@parser.phase = @parser.last_phase
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def processStartTag(name, attributes)
|
||||
parse_error("expected-eof-but-got-start-tag", {"name" => name})
|
||||
@parser.phase = @parser.last_phase
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def processEndTag(name)
|
||||
parse_error("expected-eof-but-got-end-tag", {"name" => name})
|
||||
@parser.phase = @parser.last_phase
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
end
|
||||
end
|
701
attic/vendor/plugins/HTML5lib/lib/html5/inputstream.rb
vendored
Executable file
701
attic/vendor/plugins/HTML5lib/lib/html5/inputstream.rb
vendored
Executable file
|
@ -0,0 +1,701 @@
|
|||
require 'stringio'
|
||||
require 'html5/constants'
|
||||
|
||||
module HTML5
|
||||
|
||||
# Provides a unicode stream of characters to the HTMLTokenizer.
|
||||
|
||||
# This class takes care of character encoding and removing or replacing
|
||||
# incorrect byte-sequences and also provides column and line tracking.
|
||||
|
||||
class HTMLInputStream
|
||||
|
||||
attr_accessor :queue, :char_encoding, :errors
|
||||
|
||||
# Initialises the HTMLInputStream.
|
||||
#
|
||||
# HTMLInputStream(source, [encoding]) -> Normalized stream from source
|
||||
# for use by the HTML5Lib.
|
||||
#
|
||||
# source can be either a file-object, local filename or a string.
|
||||
#
|
||||
# The optional encoding parameter must be a string that indicates
|
||||
# the encoding. If specified, that encoding will be used,
|
||||
# regardless of any BOM or later declaration (such as in a meta
|
||||
# element)
|
||||
#
|
||||
# parseMeta - Look for a <meta> element containing encoding information
|
||||
|
||||
def initialize(source, options = {})
|
||||
@encoding = nil
|
||||
@parse_meta = true
|
||||
@chardet = true
|
||||
|
||||
options.each {|name, value| instance_variable_set("@#{name}", value) }
|
||||
|
||||
# partial Ruby 1.9 support
|
||||
if @encoding and source.respond_to? :force_encoding
|
||||
source.force_encoding(@encoding) rescue nil
|
||||
end
|
||||
|
||||
# Raw Stream
|
||||
@raw_stream = open_stream(source)
|
||||
|
||||
# Encoding Information
|
||||
#Number of bytes to use when looking for a meta element with
|
||||
#encoding information
|
||||
@NUM_BYTES_META = 512
|
||||
#Number of bytes to use when using detecting encoding using chardet
|
||||
@NUM_BYTES_CHARDET = 256
|
||||
#Number of bytes to use when reading content
|
||||
@NUM_BYTES_BUFFER = 1024
|
||||
|
||||
#Encoding to use if no other information can be found
|
||||
@DEFAULT_ENCODING = 'windows-1252'
|
||||
|
||||
#Detect encoding iff no explicit "transport level" encoding is supplied
|
||||
if @encoding.nil? or not HTML5.is_valid_encoding(@encoding)
|
||||
@char_encoding = detect_encoding
|
||||
else
|
||||
@char_encoding = @encoding
|
||||
end
|
||||
|
||||
# Read bytes from stream decoding them into Unicode
|
||||
@buffer = @raw_stream.read(@NUM_BYTES_BUFFER) || ''
|
||||
if @char_encoding == 'windows-1252'
|
||||
@win1252 = true
|
||||
elsif @char_encoding != 'utf-8'
|
||||
require 'iconv'
|
||||
begin
|
||||
@buffer << @raw_stream.read unless @raw_stream.eof?
|
||||
@buffer = Iconv.iconv('utf-8', @char_encoding, @buffer).first
|
||||
rescue
|
||||
@win1252 = true
|
||||
end
|
||||
end
|
||||
|
||||
@queue = []
|
||||
@errors = []
|
||||
|
||||
# Reset position in the list to read from
|
||||
@tell = 0
|
||||
@line = @col = 0
|
||||
@line_lengths = []
|
||||
end
|
||||
|
||||
# Produces a file object from source.
|
||||
#
|
||||
# source can be either a file object, local filename or a string.
|
||||
def open_stream(source)
|
||||
# Already an IO like object
|
||||
if source.respond_to?(:read)
|
||||
source
|
||||
else
|
||||
# Treat source as a string and wrap in StringIO
|
||||
StringIO.new(source)
|
||||
end
|
||||
end
|
||||
|
||||
def detect_encoding
|
||||
|
||||
#First look for a BOM
|
||||
#This will also read past the BOM if present
|
||||
encoding = detect_bom
|
||||
|
||||
#If there is no BOM need to look for meta elements with encoding
|
||||
#information
|
||||
if encoding.nil? and @parse_meta
|
||||
encoding = detect_encoding_meta
|
||||
end
|
||||
|
||||
#Guess with chardet, if avaliable
|
||||
if encoding.nil? and @chardet
|
||||
begin
|
||||
require 'rubygems'
|
||||
require 'UniversalDetector' # gem install chardet
|
||||
buffers = []
|
||||
detector = UniversalDetector::Detector.instance
|
||||
detector.reset
|
||||
until @raw_stream.eof?
|
||||
buffer = @raw_stream.read(@NUM_BYTES_CHARDET)
|
||||
break if !buffer or buffer.empty?
|
||||
buffers << buffer
|
||||
detector.feed(buffer)
|
||||
break if detector.instance_eval {@done}
|
||||
detector.instance_eval {
|
||||
@_mLastChar = @_mLastChar.chr if Fixnum === @_mLastChar
|
||||
}
|
||||
end
|
||||
detector.close
|
||||
encoding = detector.result['encoding']
|
||||
seek(buffers*'', 0)
|
||||
rescue LoadError
|
||||
end
|
||||
end
|
||||
|
||||
# If all else fails use the default encoding
|
||||
if encoding.nil?
|
||||
encoding = @DEFAULT_ENCODING
|
||||
end
|
||||
|
||||
#Substitute for equivalent encoding
|
||||
if 'iso-8859-1' == encoding.downcase
|
||||
encoding = 'windows-1252'
|
||||
end
|
||||
|
||||
encoding
|
||||
end
|
||||
|
||||
# Attempts to detect at BOM at the start of the stream. If
|
||||
# an encoding can be determined from the BOM return the name of the
|
||||
# encoding otherwise return nil
|
||||
def detect_bom
|
||||
bom_dict = {
|
||||
"\xef\xbb\xbf" => 'utf-8',
|
||||
"\xff\xfe" => 'utf-16le',
|
||||
"\xfe\xff" => 'utf-16be',
|
||||
"\xff\xfe\x00\x00" => 'utf-32le',
|
||||
"\x00\x00\xfe\xff" => 'utf-32be'
|
||||
}
|
||||
|
||||
# Go to beginning of file and read in 4 bytes
|
||||
string = @raw_stream.read(4)
|
||||
return nil unless string
|
||||
|
||||
# Try detecting the BOM using bytes from the string
|
||||
encoding = bom_dict[string[0...3]] # UTF-8
|
||||
seek = 3
|
||||
unless encoding
|
||||
# Need to detect UTF-32 before UTF-16
|
||||
encoding = bom_dict[string] # UTF-32
|
||||
seek = 4
|
||||
unless encoding
|
||||
encoding = bom_dict[string[0...2]] # UTF-16
|
||||
seek = 2
|
||||
end
|
||||
end
|
||||
|
||||
# Set the read position past the BOM if one was found, otherwise
|
||||
# set it to the start of the stream
|
||||
seek(string, encoding ? seek : 0)
|
||||
|
||||
return encoding
|
||||
end
|
||||
|
||||
def seek(buffer, n)
|
||||
if @raw_stream.respond_to?(:unget)
|
||||
@raw_stream.unget(buffer[n..-1])
|
||||
return
|
||||
end
|
||||
|
||||
if @raw_stream.respond_to?(:seek)
|
||||
begin
|
||||
@raw_stream.seek(n)
|
||||
return
|
||||
rescue Errno::ESPIPE
|
||||
end
|
||||
end
|
||||
|
||||
#TODO: huh?
|
||||
require 'delegate'
|
||||
@raw_stream = SimpleDelegator.new(@raw_stream)
|
||||
|
||||
class << @raw_stream
|
||||
def read(chars=-1)
|
||||
if chars == -1 or chars > @data.length
|
||||
result = @data
|
||||
@data = ''
|
||||
return result if __getobj__.eof?
|
||||
return result + __getobj__.read if chars == -1
|
||||
return result + __getobj__.read(chars-result.length)
|
||||
elsif @data.empty?
|
||||
return __getobj__.read(chars)
|
||||
else
|
||||
result = @data[1...chars]
|
||||
@data = @data[chars..-1]
|
||||
return result
|
||||
end
|
||||
end
|
||||
|
||||
def unget(data)
|
||||
if !@data or @data.empty?
|
||||
@data = data
|
||||
else
|
||||
@data += data
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@raw_stream.unget(buffer[n .. -1])
|
||||
end
|
||||
|
||||
# Report the encoding declared by the meta element
|
||||
def detect_encoding_meta
|
||||
buffer = @raw_stream.read(@NUM_BYTES_META)
|
||||
parser = EncodingParser.new(buffer)
|
||||
seek(buffer, 0)
|
||||
return parser.get_encoding
|
||||
end
|
||||
|
||||
# Returns (line, col) of the current position in the stream.
|
||||
def position
|
||||
line, col = @line, @col
|
||||
if @queue and @queue.last != :EOF
|
||||
@queue.reverse.each do |c|
|
||||
if c == "\n"
|
||||
line -= 1
|
||||
raise RuntimeError.new("col=#{col}") unless col == 0
|
||||
col = @line_lengths[line]
|
||||
else
|
||||
col -= 1
|
||||
end
|
||||
end
|
||||
end
|
||||
return [line + 1, col]
|
||||
end
|
||||
|
||||
# Read one character from the stream or queue if available. Return
|
||||
# EOF when EOF is reached.
|
||||
def char
|
||||
unless @queue.empty?
|
||||
return @queue.shift
|
||||
else
|
||||
if @tell + 3 > @buffer.length && !@raw_stream.eof?
|
||||
# read next block
|
||||
@buffer = @buffer[@tell..-1] + @raw_stream.read(@NUM_BYTES_BUFFER)
|
||||
@tell = 0
|
||||
end
|
||||
|
||||
c = @buffer[@tell]
|
||||
@tell += 1
|
||||
|
||||
case c
|
||||
|
||||
when String
|
||||
# partial Ruby 1.9 support
|
||||
case c
|
||||
when "\0"
|
||||
@errors.push("null-character")
|
||||
c = "\uFFFD" # null characters are invalid
|
||||
when "\r"
|
||||
@tell += 1 if @buffer[@tell] == "\n"
|
||||
c = "\n"
|
||||
when "\x80" .. "\x9F"
|
||||
c = ENTITIES_WINDOWS1252[c.ord-0x80].chr('utf-8')
|
||||
when "\xA0" .. "\xFF"
|
||||
if c.encoding == Encoding::ASCII_8BIT
|
||||
c = c.encode('utf-8','iso-8859-1')
|
||||
end
|
||||
end
|
||||
|
||||
if c == "\x0D"
|
||||
# normalize newlines
|
||||
@tell += 1 if @buffer[@tell] == 0x0A
|
||||
c = 0x0A
|
||||
end
|
||||
|
||||
# update position in stream
|
||||
if c == "\x0a"
|
||||
@line_lengths << @col
|
||||
@line += 1
|
||||
@col = 0
|
||||
else
|
||||
@col += 1
|
||||
end
|
||||
|
||||
c
|
||||
|
||||
when 0x01..0x7F
|
||||
if c == 0x0D
|
||||
# normalize newlines
|
||||
@tell += 1 if @buffer[@tell] == 0x0A
|
||||
c = 0x0A
|
||||
end
|
||||
|
||||
# update position in stream
|
||||
if c == 0x0a
|
||||
@line_lengths << @col
|
||||
@line += 1
|
||||
@col = 0
|
||||
else
|
||||
@col += 1
|
||||
end
|
||||
|
||||
c.chr
|
||||
|
||||
when 0x80..0xBF
|
||||
if !@win1252
|
||||
[0xFFFD].pack('U') # invalid utf-8
|
||||
elsif c <= 0x9f
|
||||
[ENTITIES_WINDOWS1252[c-0x80]].pack('U')
|
||||
else
|
||||
"\xC2" + c.chr # convert to utf-8
|
||||
end
|
||||
|
||||
when 0xC0..0xFF
|
||||
if instance_variable_defined?("@win1252") && @win1252
|
||||
"\xC3" + (c - 64).chr # convert to utf-8
|
||||
# from http://www.w3.org/International/questions/qa-forms-utf-8.en.php
|
||||
elsif @buffer[@tell - 1..@tell + 3] =~ /^
|
||||
( [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
||||
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
||||
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
|
||||
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
||||
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
||||
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
||||
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
||||
)/x
|
||||
@tell += $1.length - 1
|
||||
$1
|
||||
else
|
||||
[0xFFFD].pack('U') # invalid utf-8
|
||||
end
|
||||
|
||||
when 0x00
|
||||
@errors.push("null-character")
|
||||
[0xFFFD].pack('U') # null characters are invalid
|
||||
|
||||
else
|
||||
:EOF
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Returns a string of characters from the stream up to but not
|
||||
# including any character in characters or EOF. characters can be
|
||||
# any container that supports the in method being called on it.
|
||||
def chars_until(characters, opposite=false)
|
||||
char_stack = [char]
|
||||
|
||||
while char_stack.last != :EOF
|
||||
break unless (characters.include?(char_stack.last)) == opposite
|
||||
char_stack.push(char)
|
||||
end
|
||||
|
||||
# Put the character stopped on back to the front of the queue
|
||||
# from where it came.
|
||||
c = char_stack.pop
|
||||
@queue.insert(0, c) unless c == :EOF
|
||||
return char_stack.join('')
|
||||
end
|
||||
|
||||
def unget(characters)
|
||||
return if characters == :EOF
|
||||
if characters.respond_to? :to_a
|
||||
@queue.unshift(*characters.to_a)
|
||||
else
|
||||
characters.reverse.each_char {|c| @queue.unshift(c)}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# String-like object with an assosiated position and various extra methods
|
||||
# If the position is ever greater than the string length then an exception is raised
|
||||
class EncodingBytes < String
|
||||
|
||||
attr_accessor :position
|
||||
|
||||
def initialize(value)
|
||||
super(value)
|
||||
@position = -1
|
||||
end
|
||||
|
||||
def each
|
||||
while @position < length
|
||||
@position += 1
|
||||
yield self[@position]
|
||||
end
|
||||
rescue EOF
|
||||
end
|
||||
|
||||
def current_byte
|
||||
raise EOF if @position >= length
|
||||
return self[@position].chr
|
||||
end
|
||||
|
||||
# Skip past a list of characters
|
||||
def skip(chars=SPACE_CHARACTERS)
|
||||
while chars.include?(current_byte)
|
||||
@position += 1
|
||||
end
|
||||
end
|
||||
|
||||
# Look for a sequence of bytes at the start of a string. If the bytes
|
||||
# are found return true and advance the position to the byte after the
|
||||
# match. Otherwise return false and leave the position alone
|
||||
def match_bytes(bytes, lower=false)
|
||||
data = self[position ... position+bytes.length]
|
||||
data.downcase! if lower
|
||||
rv = (data == bytes)
|
||||
@position += bytes.length if rv == true
|
||||
return rv
|
||||
end
|
||||
|
||||
# Look for the next sequence of bytes matching a given sequence. If
|
||||
# a match is found advance the position to the last byte of the match
|
||||
def jump_to(bytes)
|
||||
new_position = self[position .. -1].index(bytes)
|
||||
if new_position
|
||||
@position += (new_position + bytes.length-1)
|
||||
return true
|
||||
else
|
||||
raise EOF
|
||||
end
|
||||
end
|
||||
|
||||
# Move the pointer so it points to the next byte in a set of possible
|
||||
# bytes
|
||||
def find_next(byte_list)
|
||||
until byte_list.include?(current_byte)
|
||||
@position += 1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Mini parser for detecting character encoding from meta elements
|
||||
class EncodingParser
|
||||
|
||||
# string - the data to work on for encoding detection
|
||||
def initialize(data)
|
||||
@data = EncodingBytes.new(data.to_s)
|
||||
@encoding = nil
|
||||
end
|
||||
|
||||
@@method_dispatch = [
|
||||
['<!--', :handle_comment],
|
||||
['<meta', :handle_meta],
|
||||
['</', :handle_possible_end_tag],
|
||||
['<!', :handle_other],
|
||||
['<?', :handle_other],
|
||||
['<', :handle_possible_start_tag]
|
||||
]
|
||||
|
||||
def get_encoding
|
||||
@data.each do |byte|
|
||||
keep_parsing = true
|
||||
@@method_dispatch.each do |(key, method)|
|
||||
if @data.match_bytes(key, lower = true)
|
||||
keep_parsing = send(method)
|
||||
break
|
||||
end
|
||||
end
|
||||
break unless keep_parsing
|
||||
end
|
||||
unless @encoding.nil?
|
||||
@encoding = @encoding.strip
|
||||
if ["UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE"].include?(@encoding.upcase)
|
||||
@encoding = 'utf-8'
|
||||
end
|
||||
end
|
||||
|
||||
return @encoding
|
||||
end
|
||||
|
||||
# Skip over comments
|
||||
def handle_comment
|
||||
return @data.jump_to('-->')
|
||||
end
|
||||
|
||||
def handle_meta
|
||||
# if we have <meta not followed by a space so just keep going
|
||||
return true unless SPACE_CHARACTERS.include?(@data.current_byte)
|
||||
|
||||
#We have a valid meta element we want to search for attributes
|
||||
while true
|
||||
#Try to find the next attribute after the current position
|
||||
attr = get_attribute
|
||||
|
||||
return true if attr.nil?
|
||||
|
||||
if attr[0] == 'charset'
|
||||
tentative_encoding = attr[1]
|
||||
if HTML5.is_valid_encoding(tentative_encoding)
|
||||
@encoding = tentative_encoding
|
||||
return false
|
||||
end
|
||||
elsif attr[0] == 'content'
|
||||
content_parser = ContentAttrParser.new(EncodingBytes.new(attr[1]))
|
||||
tentative_encoding = content_parser.parse
|
||||
if HTML5.is_valid_encoding(tentative_encoding)
|
||||
@encoding = tentative_encoding
|
||||
return false
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def handle_possible_start_tag
|
||||
return handle_possible_tag(false)
|
||||
end
|
||||
|
||||
def handle_possible_end_tag
|
||||
@data.position += 1
|
||||
return handle_possible_tag(true)
|
||||
end
|
||||
|
||||
def handle_possible_tag(end_tag)
|
||||
unless ASCII_LETTERS.include?(@data.current_byte)
|
||||
#If the next byte is not an ascii letter either ignore this
|
||||
#fragment (possible start tag case) or treat it according to
|
||||
#handleOther
|
||||
if end_tag
|
||||
@data.position -= 1
|
||||
handle_other
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
@data.find_next(SPACE_CHARACTERS + ['<', '>'])
|
||||
|
||||
if @data.current_byte == '<'
|
||||
#return to the first step in the overall "two step" algorithm
|
||||
#reprocessing the < byte
|
||||
@data.position -= 1
|
||||
else
|
||||
#Read all attributes
|
||||
{} until get_attribute.nil?
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def handle_other
|
||||
return @data.jump_to('>')
|
||||
end
|
||||
|
||||
# Return a name,value pair for the next attribute in the stream,
|
||||
# if one is found, or nil
|
||||
def get_attribute
|
||||
@data.skip(SPACE_CHARACTERS + ['/'])
|
||||
|
||||
if @data.current_byte == '<'
|
||||
@data.position -= 1
|
||||
return nil
|
||||
elsif @data.current_byte == '>'
|
||||
return nil
|
||||
end
|
||||
|
||||
attr_name = []
|
||||
attr_value = []
|
||||
space_found = false
|
||||
#Step 5 attribute name
|
||||
while true
|
||||
if @data.current_byte == '=' and attr_name
|
||||
break
|
||||
elsif SPACE_CHARACTERS.include?(@data.current_byte)
|
||||
space_found = true
|
||||
break
|
||||
elsif ['/', '<', '>'].include?(@data.current_byte)
|
||||
return [attr_name.join(''), '']
|
||||
elsif ASCII_UPPERCASE.include?(@data.current_byte)
|
||||
attr_name.push(@data.current_byte.downcase)
|
||||
else
|
||||
attr_name.push(@data.current_byte)
|
||||
end
|
||||
#Step 6
|
||||
@data.position += 1
|
||||
end
|
||||
#Step 7
|
||||
if space_found
|
||||
@data.skip
|
||||
#Step 8
|
||||
unless @data.current_byte == '='
|
||||
@data.position -= 1
|
||||
return [attr_name.join(''), '']
|
||||
end
|
||||
end
|
||||
#XXX need to advance position in both spaces and value case
|
||||
#Step 9
|
||||
@data.position += 1
|
||||
#Step 10
|
||||
@data.skip
|
||||
#Step 11
|
||||
if ["'", '"'].include?(@data.current_byte)
|
||||
#11.1
|
||||
quote_char = @data.current_byte
|
||||
while true
|
||||
@data.position+=1
|
||||
#11.3
|
||||
if @data.current_byte == quote_char
|
||||
@data.position += 1
|
||||
return [attr_name.join(''), attr_value.join('')]
|
||||
#11.4
|
||||
elsif ASCII_UPPERCASE.include?(@data.current_byte)
|
||||
attr_value.push(@data.current_byte.downcase)
|
||||
#11.5
|
||||
else
|
||||
attr_value.push(@data.current_byte)
|
||||
end
|
||||
end
|
||||
elsif ['>', '<'].include?(@data.current_byte)
|
||||
return [attr_name.join(''), '']
|
||||
elsif ASCII_UPPERCASE.include?(@data.current_byte)
|
||||
attr_value.push(@data.current_byte.downcase)
|
||||
else
|
||||
attr_value.push(@data.current_byte)
|
||||
end
|
||||
while true
|
||||
@data.position += 1
|
||||
if (SPACE_CHARACTERS + ['>', '<']).include?(@data.current_byte)
|
||||
return [attr_name.join(''), attr_value.join('')]
|
||||
elsif ASCII_UPPERCASE.include?(@data.current_byte)
|
||||
attr_value.push(@data.current_byte.downcase)
|
||||
else
|
||||
attr_value.push(@data.current_byte)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class ContentAttrParser
|
||||
def initialize(data)
|
||||
@data = data
|
||||
end
|
||||
|
||||
def parse
|
||||
begin
|
||||
#Skip to the first ";"
|
||||
@data.position = 0
|
||||
@data.jump_to(';')
|
||||
@data.position += 1
|
||||
@data.skip
|
||||
#Check if the attr name is charset
|
||||
#otherwise return
|
||||
@data.jump_to('charset')
|
||||
@data.position += 1
|
||||
@data.skip
|
||||
unless @data.current_byte == '='
|
||||
#If there is no = sign keep looking for attrs
|
||||
return nil
|
||||
end
|
||||
@data.position += 1
|
||||
@data.skip
|
||||
#Look for an encoding between matching quote marks
|
||||
if ['"', "'"].include?(@data.current_byte)
|
||||
quote_mark = @data.current_byte
|
||||
@data.position += 1
|
||||
old_position = @data.position
|
||||
@data.jump_to(quote_mark)
|
||||
return @data[old_position ... @data.position]
|
||||
else
|
||||
#Unquoted value
|
||||
old_position = @data.position
|
||||
begin
|
||||
@data.find_next(SPACE_CHARACTERS)
|
||||
return @data[old_position ... @data.position]
|
||||
rescue EOF
|
||||
#Return the whole remaining value
|
||||
return @data[old_position .. -1]
|
||||
end
|
||||
end
|
||||
rescue EOF
|
||||
return nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Determine if a string is a supported encoding
|
||||
def self.is_valid_encoding(encoding)
|
||||
(not encoding.nil? and encoding.kind_of?(String) and ENCODINGS.include?(encoding.downcase.strip))
|
||||
end
|
||||
|
||||
end
|
158
attic/vendor/plugins/HTML5lib/lib/html5/liberalxmlparser.rb
vendored
Executable file
158
attic/vendor/plugins/HTML5lib/lib/html5/liberalxmlparser.rb
vendored
Executable file
|
@ -0,0 +1,158 @@
|
|||
# Warning: this module is experimental and subject to change and even removal
|
||||
# at any time.
|
||||
#
|
||||
# For background/rationale, see:
|
||||
# * http://www.intertwingly.net/blog/2007/01/08/Xhtml5lib
|
||||
# * http://tinyurl.com/ylfj8k (and follow-ups)
|
||||
#
|
||||
# References:
|
||||
# * http://googlereader.blogspot.com/2005/12/xml-errors-in-feeds.html
|
||||
# * http://wiki.whatwg.org/wiki/HtmlVsXhtml
|
||||
#
|
||||
# @@TODO:
|
||||
# * Selectively lowercase only XHTML, but not foreign markup
|
||||
require 'html5/html5parser'
|
||||
require 'html5/constants'
|
||||
|
||||
module HTML5
|
||||
|
||||
# liberal XML parser
|
||||
class XMLParser < HTMLParser
|
||||
|
||||
def initialize(options = {})
|
||||
super options
|
||||
@phases[:initial] = XmlRootPhase.new(self, @tree)
|
||||
end
|
||||
|
||||
def normalize_token(token)
|
||||
case token[:type]
|
||||
when :StartTag, :EmptyTag
|
||||
# We need to remove the duplicate attributes and convert attributes
|
||||
# to a Hash so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
|
||||
|
||||
token[:data] = Hash[*token[:data].reverse.flatten]
|
||||
|
||||
# For EmptyTags, process both a Start and an End tag
|
||||
if token[:type] == :EmptyTag
|
||||
save = @tokenizer.content_model_flag
|
||||
@phase.processStartTag(token[:name], token[:data])
|
||||
@tokenizer.content_model_flag = save
|
||||
token[:data] = {}
|
||||
token[:type] = :EndTag
|
||||
end
|
||||
|
||||
when :Characters
|
||||
# un-escape RCDATA_ELEMENTS (e.g. style, script)
|
||||
if @tokenizer.content_model_flag == :CDATA
|
||||
token[:data] = token[:data].
|
||||
gsub('<','<').gsub('>','>').gsub('&','&')
|
||||
end
|
||||
|
||||
when :EndTag
|
||||
if token[:data]
|
||||
parse_error("attributes-in-end-tag")
|
||||
end
|
||||
|
||||
when :Comment
|
||||
# Rescue CDATA from the comments
|
||||
if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]"
|
||||
token[:type] = :Characters
|
||||
token[:data] = token[:data][7 ... -2]
|
||||
end
|
||||
end
|
||||
|
||||
return token
|
||||
end
|
||||
end
|
||||
|
||||
# liberal XMTHML parser
|
||||
class XHTMLParser < XMLParser
|
||||
|
||||
def initialize(options = {})
|
||||
super options
|
||||
@phases[:initial] = InitialPhase.new(self, @tree)
|
||||
@phases[:rootElement] = XhmlRootPhase.new(self, @tree)
|
||||
end
|
||||
|
||||
def normalize_token(token)
|
||||
super(token)
|
||||
|
||||
# ensure that non-void XHTML elements have content so that separate
|
||||
# open and close tags are emitted
|
||||
if token[:type] == :EndTag
|
||||
if VOID_ELEMENTS.include? token[:name]
|
||||
if @tree.open_elements[-1].name != token["name"]
|
||||
token[:type] = :EmptyTag
|
||||
token["data"] ||= {}
|
||||
end
|
||||
else
|
||||
if token[:name] == @tree.open_elements[-1].name and \
|
||||
not @tree.open_elements[-1].hasContent
|
||||
@tree.insertText('') unless
|
||||
@tree.open_elements.any? {|e|
|
||||
e.attributes.keys.include? 'xmlns' and
|
||||
e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return token
|
||||
end
|
||||
end
|
||||
|
||||
class XhmlRootPhase < RootElementPhase
|
||||
def insert_html_element
|
||||
element = @tree.createElement("html", {'xmlns' => 'http://www.w3.org/1999/xhtml'})
|
||||
@tree.open_elements.push(element)
|
||||
@tree.document.appendChild(element)
|
||||
@parser.phase = @parser.phases[:beforeHead]
|
||||
end
|
||||
end
|
||||
|
||||
class XmlRootPhase < Phase
|
||||
# Prime the Xml parser
|
||||
@start_tag_handlers = Hash.new(:startTagOther)
|
||||
@end_tag_handlers = Hash.new(:endTagOther)
|
||||
def startTagOther(name, attributes)
|
||||
@tree.open_elements.push(@tree.document)
|
||||
element = @tree.createElement(name, attributes)
|
||||
@tree.open_elements[-1].appendChild(element)
|
||||
@tree.open_elements.push(element)
|
||||
@parser.phase = XmlElementPhase.new(@parser,@tree)
|
||||
end
|
||||
def endTagOther(name)
|
||||
super
|
||||
@tree.open_elements.pop
|
||||
end
|
||||
end
|
||||
|
||||
class XmlElementPhase < Phase
|
||||
# Generic handling for all XML elements
|
||||
|
||||
@start_tag_handlers = Hash.new(:startTagOther)
|
||||
@end_tag_handlers = Hash.new(:endTagOther)
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
@tree.open_elements[-1].appendChild(element)
|
||||
@tree.open_elements.push(element)
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
for node in @tree.open_elements.reverse
|
||||
if node.name == name
|
||||
{} while @tree.open_elements.pop != node
|
||||
break
|
||||
else
|
||||
parse_error
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
@tree.insertText(data)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
203
attic/vendor/plugins/HTML5lib/lib/html5/sanitizer.rb
vendored
Normal file
203
attic/vendor/plugins/HTML5lib/lib/html5/sanitizer.rb
vendored
Normal file
|
@ -0,0 +1,203 @@
|
|||
require 'cgi'
|
||||
require 'html5/tokenizer'
|
||||
require 'set'
|
||||
|
||||
module HTML5
|
||||
|
||||
# This module provides sanitization of XHTML+MathML+SVG
|
||||
# and of inline style attributes.
|
||||
#
|
||||
# It can be either at the Tokenizer stage:
|
||||
#
|
||||
# HTMLParser.parse(html, :tokenizer => HTMLSanitizer)
|
||||
#
|
||||
# or, if you already have a parse tree (in this example, a REXML tree),
|
||||
# at the Serializer stage:
|
||||
#
|
||||
# tokens = TreeWalkers.get_tree_walker('rexml').new(tree)
|
||||
# HTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
|
||||
# :sanitize => true})
|
||||
|
||||
module HTMLSanitizeModule
|
||||
|
||||
ACCEPTABLE_ELEMENTS = Set.new %w[a abbr acronym address area audio b big blockquote br
|
||||
button caption center cite code col colgroup dd del dfn dir div dl dt
|
||||
em fieldset font form h1 h2 h3 h4 h5 h6 hr i img input ins kbd label
|
||||
legend li map menu ol optgroup option p pre q s samp select small span
|
||||
strike strong sub sup table tbody td textarea tfoot th thead tr tt u
|
||||
ul var video]
|
||||
|
||||
MATHML_ELEMENTS = Set.new %w[annotation annotation-xml maction math merror mfrac
|
||||
mfenced mi mmultiscripts mn mo mover mpadded mphantom mprescripts mroot mrow
|
||||
mspace msqrt mstyle msub msubsup msup mtable mtd mtext mtr munder
|
||||
munderover none semantics]
|
||||
|
||||
SVG_ELEMENTS = Set.new %w[a animate animateColor animateMotion animateTransform
|
||||
circle clipPath defs desc ellipse font-face font-face-name font-face-src
|
||||
foreignObject g glyph hkern linearGradient line marker metadata
|
||||
missing-glyph mpath path polygon polyline radialGradient rect set
|
||||
stop svg switch text title tspan use]
|
||||
|
||||
ACCEPTABLE_ATTRIBUTES = Set.new %w[abbr accept accept-charset accesskey action
|
||||
align alt axis border cellpadding cellspacing char charoff charset
|
||||
checked cite class clear cols colspan color compact controls coords datetime
|
||||
dir disabled enctype for frame headers height href hreflang hspace id
|
||||
ismap label lang longdesc loop maxlength media method multiple name nohref
|
||||
noshade nowrap poster prompt readonly rel rev rows rowspan rules scope
|
||||
selected shape size span src start style summary tabindex target title
|
||||
type usemap valign value vspace width xml:lang]
|
||||
|
||||
MATHML_ATTRIBUTES = Set.new %w[actiontype align close columnalign columnalign
|
||||
columnalign columnlines columnspacing columnspan depth display
|
||||
displaystyle encoding equalcolumns equalrows fence fontstyle fontweight
|
||||
frame height linethickness lspace mathbackground mathcolor mathvariant
|
||||
mathvariant maxsize minsize open other rowalign rowalign rowalign rowlines
|
||||
rowspacing rowspan rspace scriptlevel selection separator separators
|
||||
stretchy width width xlink:href xlink:show xlink:type xmlns xmlns:xlink]
|
||||
|
||||
SVG_ATTRIBUTES = Set.new %w[accent-height accumulate additive alphabetic
|
||||
arabic-form ascent attributeName attributeType baseProfile bbox begin
|
||||
by calcMode cap-height class clip-path clip-rule color color-rendering
|
||||
content cx cy d dx dy descent display dur end fill fill-opacity fill-rule
|
||||
font-family font-size font-stretch font-style font-variant font-weight from
|
||||
fx fy g1 g2 glyph-name gradientUnits hanging height horiz-adv-x horiz-origin-x
|
||||
id ideographic k keyPoints keySplines keyTimes lang marker-end
|
||||
marker-mid marker-start markerHeight markerUnits markerWidth
|
||||
mathematical max min name offset opacity orient origin
|
||||
overline-position overline-thickness panose-1 path pathLength points
|
||||
preserveAspectRatio r refX refY repeatCount repeatDur
|
||||
requiredExtensions requiredFeatures restart rotate rx ry slope stemh
|
||||
stemv stop-color stop-opacity strikethrough-position
|
||||
strikethrough-thickness stroke stroke-dasharray stroke-dashoffset
|
||||
stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity
|
||||
stroke-width systemLanguage target text-anchor to transform type u1
|
||||
u2 underline-position underline-thickness unicode unicode-range
|
||||
units-per-em values version viewBox visibility width widths x
|
||||
x-height x1 x2 xlink:actuate xlink:arcrole xlink:href xlink:role
|
||||
xlink:show xlink:title xlink:type xml:base xml:lang xml:space xmlns
|
||||
xmlns:xlink y y1 y2 zoomAndPan]
|
||||
|
||||
ATTR_VAL_IS_URI = Set.new %w[href src cite action longdesc xlink:href xml:base]
|
||||
|
||||
SVG_ATTR_VAL_ALLOWS_REF = Set.new %w[clip-path color-profile cursor fill
|
||||
filter marker marker-start marker-mid marker-end mask stroke]
|
||||
|
||||
SVG_ALLOW_LOCAL_HREF = Set.new %w[altGlyph animate animateColor animateMotion
|
||||
animateTransform cursor feImage filter linearGradient pattern
|
||||
radialGradient textpath tref set use]
|
||||
|
||||
ACCEPTABLE_CSS_PROPERTIES = Set.new %w[azimuth background-color
|
||||
border-bottom-color border-collapse border-color border-left-color
|
||||
border-right-color border-top-color clear color cursor direction
|
||||
display elevation float font font-family font-size font-style
|
||||
font-variant font-weight height letter-spacing line-height overflow
|
||||
pause pause-after pause-before pitch pitch-range richness speak
|
||||
speak-header speak-numeral speak-punctuation speech-rate stress
|
||||
text-align text-decoration text-indent unicode-bidi vertical-align
|
||||
voice-family volume white-space width]
|
||||
|
||||
ACCEPTABLE_CSS_KEYWORDS = Set.new %w[auto aqua black block blue bold both bottom
|
||||
brown center collapse dashed dotted fuchsia gray green !important
|
||||
italic left lime maroon medium none navy normal nowrap olive pointer
|
||||
purple red right solid silver teal top transparent underline white
|
||||
yellow]
|
||||
|
||||
ACCEPTABLE_SVG_PROPERTIES = Set.new %w[fill fill-opacity fill-rule stroke
|
||||
stroke-width stroke-linecap stroke-linejoin stroke-opacity]
|
||||
|
||||
ACCEPTABLE_PROTOCOLS = Set.new %w[ed2k ftp http https irc mailto news gopher nntp
|
||||
telnet webcal xmpp callto feed urn aim rsync tag ssh sftp rtsp afs]
|
||||
|
||||
# subclasses may define their own versions of these constants
|
||||
ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
|
||||
ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
|
||||
ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
|
||||
ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
|
||||
ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
|
||||
ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
|
||||
|
||||
def sanitize_token(token)
|
||||
case token[:type]
|
||||
when :StartTag, :EndTag, :EmptyTag
|
||||
if self.class.const_get("ALLOWED_ELEMENTS").include?(token[:name])
|
||||
if token.has_key? :data
|
||||
attrs = Hash[*token[:data].flatten]
|
||||
attrs.delete_if { |attr,v| !self.class.const_get("ALLOWED_ATTRIBUTES").include?(attr) }
|
||||
ATTR_VAL_IS_URI.each do |attr|
|
||||
val_unescaped = CGI.unescapeHTML(attrs[attr].to_s).gsub(/`|[\000-\040\177\s]+|\302[\200-\240]/,'').downcase
|
||||
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !self.class.const_get("ALLOWED_PROTOCOLS").include?(val_unescaped.split(':')[0])
|
||||
attrs.delete attr
|
||||
end
|
||||
end
|
||||
SVG_ATTR_VAL_ALLOWS_REF.each do |attr|
|
||||
attrs[attr] = attrs[attr].to_s.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attrs[attr]
|
||||
end
|
||||
if SVG_ALLOW_LOCAL_HREF.include?(token[:name]) && attrs['xlink:href'] && attrs['xlink:href'] =~ /^\s*[^#\s].*/m
|
||||
attrs.delete 'xlink:href'
|
||||
end
|
||||
if attrs['style']
|
||||
attrs['style'] = sanitize_css(attrs['style'])
|
||||
end
|
||||
token[:data] = attrs.map {|k,v| [k,v]}
|
||||
end
|
||||
return token
|
||||
else
|
||||
if token[:type] == :EndTag
|
||||
token[:data] = "</#{token[:name]}>"
|
||||
elsif token[:data]
|
||||
attrs = token[:data].map {|k,v| " #{k}=\"#{CGI.escapeHTML(v)}\""}.join('')
|
||||
token[:data] = "<#{token[:name]}#{attrs}>"
|
||||
else
|
||||
token[:data] = "<#{token[:name]}>"
|
||||
end
|
||||
token[:data].insert(-2,'/') if token[:type] == :EmptyTag
|
||||
token[:type] = :Characters
|
||||
token.delete(:name)
|
||||
return token
|
||||
end
|
||||
when :Comment
|
||||
token[:data] = ""
|
||||
return token
|
||||
else
|
||||
return token
|
||||
end
|
||||
end
|
||||
|
||||
def sanitize_css(style)
|
||||
# disallow urls
|
||||
style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
|
||||
|
||||
# gauntlet
|
||||
return '' unless style =~ /^([-:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/
|
||||
return '' unless style =~ /^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$/
|
||||
|
||||
clean = []
|
||||
style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop, val|
|
||||
next if val.empty?
|
||||
prop.downcase!
|
||||
if self.class.const_get("ALLOWED_CSS_PROPERTIES").include?(prop)
|
||||
clean << "#{prop}: #{val};"
|
||||
elsif %w[background border margin padding].include?(prop.split('-')[0])
|
||||
clean << "#{prop}: #{val};" unless val.split().any? do |keyword|
|
||||
!self.class.const_get("ALLOWED_CSS_KEYWORDS").include?(keyword) and
|
||||
keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
|
||||
end
|
||||
elsif self.class.const_get("ALLOWED_SVG_PROPERTIES").include?(prop)
|
||||
clean << "#{prop}: #{val};"
|
||||
end
|
||||
end
|
||||
|
||||
style = clean.join(' ')
|
||||
end
|
||||
end
|
||||
|
||||
class HTMLSanitizer < HTMLTokenizer
|
||||
include HTMLSanitizeModule
|
||||
def each
|
||||
super do |token|
|
||||
yield(sanitize_token(token))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
2
attic/vendor/plugins/HTML5lib/lib/html5/serializer.rb
vendored
Normal file
2
attic/vendor/plugins/HTML5lib/lib/html5/serializer.rb
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
require 'html5/serializer/htmlserializer'
|
||||
require 'html5/serializer/xhtmlserializer'
|
179
attic/vendor/plugins/HTML5lib/lib/html5/serializer/htmlserializer.rb
vendored
Normal file
179
attic/vendor/plugins/HTML5lib/lib/html5/serializer/htmlserializer.rb
vendored
Normal file
|
@ -0,0 +1,179 @@
|
|||
require 'html5/constants'
|
||||
|
||||
module HTML5
|
||||
|
||||
class HTMLSerializer
|
||||
|
||||
def self.serialize(stream, options = {})
|
||||
new(options).serialize(stream, options[:encoding])
|
||||
end
|
||||
|
||||
def escape(string)
|
||||
string.gsub("&", "&").gsub("<", "<").gsub(">", ">")
|
||||
end
|
||||
|
||||
def initialize(options={})
|
||||
@quote_attr_values = false
|
||||
@quote_char = '"'
|
||||
@use_best_quote_char = true
|
||||
@minimize_boolean_attributes = true
|
||||
|
||||
@use_trailing_solidus = false
|
||||
@space_before_trailing_solidus = true
|
||||
@escape_lt_in_attrs = false
|
||||
@escape_rcdata = false
|
||||
|
||||
@omit_optional_tags = true
|
||||
@sanitize = false
|
||||
|
||||
@strip_whitespace = false
|
||||
|
||||
@inject_meta_charset = true
|
||||
|
||||
options.each do |name, value|
|
||||
next unless instance_variable_defined?("@#{name}")
|
||||
@use_best_quote_char = false if name.to_s == 'quote_char'
|
||||
instance_variable_set("@#{name}", value)
|
||||
end
|
||||
|
||||
@errors = []
|
||||
end
|
||||
|
||||
def serialize(treewalker, encoding=nil)
|
||||
in_cdata = false
|
||||
@errors = []
|
||||
|
||||
if encoding and @inject_meta_charset
|
||||
require 'html5/filters/inject_meta_charset'
|
||||
treewalker = Filters::InjectMetaCharset.new(treewalker, encoding)
|
||||
end
|
||||
|
||||
if @strip_whitespace
|
||||
require 'html5/filters/whitespace'
|
||||
treewalker = Filters::WhitespaceFilter.new(treewalker)
|
||||
end
|
||||
|
||||
if @sanitize
|
||||
require 'html5/filters/sanitizer'
|
||||
treewalker = Filters::HTMLSanitizeFilter.new(treewalker)
|
||||
end
|
||||
|
||||
if @omit_optional_tags
|
||||
require 'html5/filters/optionaltags'
|
||||
treewalker = Filters::OptionalTagFilter.new(treewalker)
|
||||
end
|
||||
|
||||
result = []
|
||||
treewalker.each do |token|
|
||||
type = token[:type]
|
||||
if type == :Doctype
|
||||
doctype = "<!DOCTYPE %s>" % token[:name]
|
||||
result << doctype
|
||||
|
||||
elsif [:Characters, :SpaceCharacters].include? type
|
||||
if type == :SpaceCharacters or in_cdata
|
||||
if in_cdata and token[:data].include?("</")
|
||||
serialize_error("Unexpected </ in CDATA")
|
||||
end
|
||||
result << token[:data]
|
||||
else
|
||||
result << escape(token[:data])
|
||||
end
|
||||
|
||||
elsif [:StartTag, :EmptyTag].include? type
|
||||
name = token[:name]
|
||||
if RCDATA_ELEMENTS.include?(name) and not @escape_rcdata
|
||||
in_cdata = true
|
||||
elsif in_cdata
|
||||
serialize_error(_("Unexpected child element of a CDATA element"))
|
||||
end
|
||||
attributes = []
|
||||
for k,v in attrs = token[:data].to_a.sort
|
||||
attributes << ' '
|
||||
|
||||
attributes << k
|
||||
if not @minimize_boolean_attributes or \
|
||||
(!(BOOLEAN_ATTRIBUTES[name]||[]).include?(k) \
|
||||
and !BOOLEAN_ATTRIBUTES[:global].include?(k))
|
||||
attributes << "="
|
||||
if @quote_attr_values or v.empty?
|
||||
quote_attr = true
|
||||
else
|
||||
quote_attr = (SPACE_CHARACTERS + %w(< > " ')).any? {|c| v.include?(c)}
|
||||
end
|
||||
v = v.gsub("&", "&")
|
||||
v = v.gsub("<", "<") if @escape_lt_in_attrs
|
||||
if quote_attr
|
||||
quote_char = @quote_char
|
||||
if @use_best_quote_char
|
||||
if v.index("'") and !v.index('"')
|
||||
quote_char = '"'
|
||||
elsif v.index('"') and !v.index("'")
|
||||
quote_char = "'"
|
||||
end
|
||||
end
|
||||
if quote_char == "'"
|
||||
v = v.gsub("'", "'")
|
||||
else
|
||||
v = v.gsub('"', """)
|
||||
end
|
||||
attributes << quote_char << v << quote_char
|
||||
else
|
||||
attributes << v
|
||||
end
|
||||
end
|
||||
end
|
||||
if VOID_ELEMENTS.include?(name) and @use_trailing_solidus
|
||||
if @space_before_trailing_solidus
|
||||
attributes << " /"
|
||||
else
|
||||
attributes << "/"
|
||||
end
|
||||
end
|
||||
result << "<%s%s>" % [name, attributes.join('')]
|
||||
|
||||
elsif type == :EndTag
|
||||
name = token[:name]
|
||||
if RCDATA_ELEMENTS.include?(name)
|
||||
in_cdata = false
|
||||
elsif in_cdata
|
||||
serialize_error(_("Unexpected child element of a CDATA element"))
|
||||
end
|
||||
end_tag = "</#{name}>"
|
||||
result << end_tag
|
||||
|
||||
elsif type == :Comment
|
||||
data = token[:data]
|
||||
serialize_error("Comment contains --") if data.index("--")
|
||||
comment = "<!--%s-->" % token[:data]
|
||||
result << comment
|
||||
|
||||
else
|
||||
serialize_error(token[:data])
|
||||
end
|
||||
end
|
||||
|
||||
if encoding and encoding != 'utf-8'
|
||||
require 'iconv'
|
||||
Iconv.iconv(encoding, 'utf-8', result.join('')).first
|
||||
else
|
||||
result.join('')
|
||||
end
|
||||
end
|
||||
|
||||
alias :render :serialize
|
||||
|
||||
def serialize_error(data="XXX ERROR MESSAGE NEEDED")
|
||||
# XXX The idea is to make data mandatory.
|
||||
@errors.push(data)
|
||||
if @strict
|
||||
raise SerializeError
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
# Error in serialized tree
|
||||
class SerializeError < Exception
|
||||
end
|
||||
end
|
20
attic/vendor/plugins/HTML5lib/lib/html5/serializer/xhtmlserializer.rb
vendored
Normal file
20
attic/vendor/plugins/HTML5lib/lib/html5/serializer/xhtmlserializer.rb
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
require 'html5/serializer/htmlserializer'
|
||||
|
||||
module HTML5
|
||||
|
||||
class XHTMLSerializer < HTMLSerializer
|
||||
DEFAULTS = {
|
||||
:quote_attr_values => true,
|
||||
:minimize_boolean_attributes => false,
|
||||
:use_trailing_solidus => true,
|
||||
:escape_lt_in_attrs => true,
|
||||
:omit_optional_tags => false,
|
||||
:escape_rcdata => true
|
||||
}
|
||||
|
||||
def initialize(options={})
|
||||
super(DEFAULTS.clone.update(options))
|
||||
end
|
||||
end
|
||||
|
||||
end
|
45
attic/vendor/plugins/HTML5lib/lib/html5/sniffer.rb
vendored
Normal file
45
attic/vendor/plugins/HTML5lib/lib/html5/sniffer.rb
vendored
Normal file
|
@ -0,0 +1,45 @@
|
|||
module HTML5
|
||||
module Sniffer
|
||||
# 4.7.4
|
||||
def html_or_feed str
|
||||
s = str[0, 512] # steps 1, 2
|
||||
pos = 0
|
||||
|
||||
while pos < s.length
|
||||
case s[pos]
|
||||
when ?\t, ?\ , ?\n, ?\r # 0x09, 0x20, 0x0A, 0x0D == tab, space, LF, CR
|
||||
pos += 1
|
||||
when ?< # 0x3C
|
||||
pos += 1
|
||||
if s[pos..pos+2] == "!--" # [0x21, 0x2D, 0x2D]
|
||||
pos += 3
|
||||
until s[pos..pos+2] == "-->" or pos >= s.length
|
||||
pos += 1
|
||||
end
|
||||
pos += 3
|
||||
elsif s[pos] == ?! # 0x21
|
||||
pos += 1
|
||||
until s[pos] == ?> or pos >= s.length # 0x3E
|
||||
pos += 1
|
||||
end
|
||||
pos += 1
|
||||
elsif s[pos] == ?? # 0x3F
|
||||
until s[pos..pos+1] == "?>" or pos >= s.length # [0x3F, 0x3E]
|
||||
pos += 1
|
||||
end
|
||||
pos += 2
|
||||
elsif s[pos..pos+2] == "rss" # [0x72, 0x73, 0x73]
|
||||
return "application/rss+xml"
|
||||
elsif s[pos..pos+3] == "feed" # [0x66, 0x65, 0x65, 0x64]
|
||||
return "application/atom+xml"
|
||||
elsif s[pos..pos+6] == "rdf:RDF" # [0x72, 0x64, 0x66, 0x3A, 0x52, 0x44, 0x46]
|
||||
raise NotImplementedError
|
||||
end
|
||||
else
|
||||
break
|
||||
end
|
||||
end
|
||||
"text/html"
|
||||
end
|
||||
end
|
||||
end
|
970
attic/vendor/plugins/HTML5lib/lib/html5/tokenizer.rb
vendored
Normal file
970
attic/vendor/plugins/HTML5lib/lib/html5/tokenizer.rb
vendored
Normal file
|
@ -0,0 +1,970 @@
|
|||
require 'html5/constants'
|
||||
require 'html5/inputstream'
|
||||
|
||||
module HTML5
|
||||
|
||||
# This class takes care of tokenizing HTML.
|
||||
#
|
||||
# * @current_token
|
||||
# Holds the token that is currently being processed.
|
||||
#
|
||||
# * @state
|
||||
# Holds a reference to the method to be invoked... XXX
|
||||
#
|
||||
# * @states
|
||||
# Holds a mapping between states and methods that implement the state.
|
||||
#
|
||||
# * @stream
|
||||
# Points to HTMLInputStream object.
|
||||
|
||||
class HTMLTokenizer
|
||||
attr_accessor :content_model_flag, :current_token
|
||||
attr_reader :stream
|
||||
|
||||
# XXX need to fix documentation
|
||||
|
||||
def initialize(stream, options = {})
|
||||
@stream = HTMLInputStream.new(stream, options)
|
||||
|
||||
# Setup the initial tokenizer state
|
||||
@content_model_flag = :PCDATA
|
||||
@state = :data_state
|
||||
@escapeFlag = false
|
||||
@lastFourChars = []
|
||||
|
||||
# The current token being created
|
||||
@current_token = nil
|
||||
|
||||
# Tokens to be processed.
|
||||
@token_queue = []
|
||||
@lowercase_element_name = options[:lowercase_element_name] != false
|
||||
@lowercase_attr_name = options[:lowercase_attr_name] != false
|
||||
end
|
||||
|
||||
# This is where the magic happens.
|
||||
#
|
||||
# We do our usually processing through the states and when we have a token
|
||||
# to return we yield the token which pauses processing until the next token
|
||||
# is requested.
|
||||
def each
|
||||
@token_queue = []
|
||||
# Start processing. When EOF is reached @state will return false
|
||||
# instead of true and the loop will terminate.
|
||||
while send @state
|
||||
yield :type => :ParseError, :data => @stream.errors.shift until @stream.errors.empty?
|
||||
yield @token_queue.shift until @token_queue.empty?
|
||||
end
|
||||
end
|
||||
|
||||
# Below are various helper functions the tokenizer states use worked out.
|
||||
|
||||
# If the next character is a '>', convert the current_token into
|
||||
# an EmptyTag
|
||||
|
||||
def process_solidus_in_tag
|
||||
|
||||
# We need to consume another character to make sure it's a ">"
|
||||
data = @stream.char
|
||||
|
||||
if @current_token[:type] == :StartTag and data == ">"
|
||||
@current_token[:type] = :EmptyTag
|
||||
else
|
||||
@token_queue << {:type => :ParseError, :data => "incorrectly-placed-solidus"}
|
||||
end
|
||||
|
||||
# The character we just consumed need to be put back on the stack so it
|
||||
# doesn't get lost...
|
||||
@stream.unget(data)
|
||||
end
|
||||
|
||||
# This function returns either U+FFFD or the character based on the
|
||||
# decimal or hexadecimal representation. It also discards ";" if present.
|
||||
# If not present @token_queue << {:type => :ParseError}" is invoked.
|
||||
|
||||
def consume_number_entity(isHex)
|
||||
|
||||
# XXX More need to be done here. For instance, #13 should prolly be
|
||||
# converted to #10 so we don't get \r (#13 is \r right?) in the DOM and
|
||||
# such. Thoughts on this appreciated.
|
||||
allowed = DIGITS
|
||||
radix = 10
|
||||
if isHex
|
||||
allowed = HEX_DIGITS
|
||||
radix = 16
|
||||
end
|
||||
|
||||
char_stack = []
|
||||
|
||||
# Consume all the characters that are in range while making sure we
|
||||
# don't hit an EOF.
|
||||
c = @stream.char
|
||||
while allowed.include?(c) and c != :EOF
|
||||
char_stack.push(c)
|
||||
c = @stream.char
|
||||
end
|
||||
|
||||
# Convert the set of characters consumed to an int.
|
||||
charAsInt = char_stack.join('').to_i(radix)
|
||||
|
||||
if charAsInt == 13
|
||||
@token_queue << {:type => :ParseError, :data => "incorrect-cr-newline-entity"}
|
||||
charAsInt = 10
|
||||
elsif (128..159).include? charAsInt
|
||||
# If the integer is between 127 and 160 (so 128 and bigger and 159
|
||||
# and smaller) we need to do the "windows trick".
|
||||
@token_queue << {:type => :ParseError, :data => "illegal-windows-1252-entity"}
|
||||
|
||||
charAsInt = ENTITIES_WINDOWS1252[charAsInt - 128]
|
||||
end
|
||||
|
||||
if 0 < charAsInt and charAsInt <= 1114111 and not (55296 <= charAsInt and charAsInt <= 57343)
|
||||
if String.method_defined? :force_encoding
|
||||
char = charAsInt.chr('utf-8')
|
||||
else
|
||||
char = [charAsInt].pack('U')
|
||||
end
|
||||
else
|
||||
char = [0xFFFD].pack('U')
|
||||
@token_queue << {:type => :ParseError, :data => "cant-convert-numeric-entity", :datavars => {"charAsInt" => charAsInt}}
|
||||
end
|
||||
|
||||
# Discard the ; if present. Otherwise, put it back on the queue and
|
||||
# invoke parse_error on parser.
|
||||
if c != ";"
|
||||
@token_queue << {:type => :ParseError, :data => "numeric-entity-without-semicolon"}
|
||||
@stream.unget(c)
|
||||
end
|
||||
|
||||
return char
|
||||
end
|
||||
|
||||
def consume_entity(from_attribute=false)
|
||||
char = nil
|
||||
char_stack = [@stream.char]
|
||||
if SPACE_CHARACTERS.include?(char_stack[0]) or [:EOF, '<', '&'].include?(char_stack[0])
|
||||
@stream.unget(char_stack)
|
||||
elsif char_stack[0] == '#'
|
||||
# We might have a number entity here.
|
||||
char_stack += [@stream.char, @stream.char]
|
||||
if char_stack[0 .. 1].include? :EOF
|
||||
# If we reach the end of the file put everything up to :EOF
|
||||
# back in the queue
|
||||
char_stack = char_stack[0...char_stack.index(:EOF)]
|
||||
@stream.unget(char_stack)
|
||||
@token_queue << {:type => :ParseError, :data => "expected-numeric-entity-but-got-eof"}
|
||||
else
|
||||
if char_stack[1].downcase == "x" and HEX_DIGITS.include? char_stack[2]
|
||||
# Hexadecimal entity detected.
|
||||
@stream.unget(char_stack[2])
|
||||
char = consume_number_entity(true)
|
||||
elsif DIGITS.include? char_stack[1]
|
||||
# Decimal entity detected.
|
||||
@stream.unget(char_stack[1..-1])
|
||||
char = consume_number_entity(false)
|
||||
else
|
||||
# No number entity detected.
|
||||
@stream.unget(char_stack)
|
||||
@token_queue << {:type => :ParseError, :data => "expected-numeric-entity"}
|
||||
end
|
||||
end
|
||||
else
|
||||
# At this point in the process might have named entity. Entities
|
||||
# are stored in the global variable "entities".
|
||||
#
|
||||
# Consume characters and compare to these to a substring of the
|
||||
# entity names in the list until the substring no longer matches.
|
||||
filteredEntityList = ENTITIES.keys
|
||||
filteredEntityList.reject! {|e| e[0].chr != char_stack[0]}
|
||||
entityName = nil
|
||||
|
||||
# Try to find the longest entity the string will match to take care
|
||||
# of ¬i for instance.
|
||||
while char_stack.last != :EOF
|
||||
name = char_stack.join('')
|
||||
if filteredEntityList.any? {|e| e[0...name.length] == name}
|
||||
filteredEntityList.reject! {|e| e[0...name.length] != name}
|
||||
char_stack.push(@stream.char)
|
||||
else
|
||||
break
|
||||
end
|
||||
|
||||
if ENTITIES.include? name
|
||||
entityName = name
|
||||
break if entityName[-1] == ';'
|
||||
end
|
||||
end
|
||||
|
||||
if entityName != nil
|
||||
char = ENTITIES[entityName]
|
||||
|
||||
# Check whether or not the last character returned can be
|
||||
# discarded or needs to be put back.
|
||||
if entityName[-1] != ?;
|
||||
@token_queue << {:type => :ParseError, :data => "named-entity-without-semicolon"}
|
||||
end
|
||||
|
||||
if entityName[-1] != ";" and from_attribute and
|
||||
(ASCII_LETTERS.include?(char_stack[entityName.length]) or
|
||||
DIGITS.include?(char_stack[entityName.length]))
|
||||
@stream.unget(char_stack)
|
||||
char = '&'
|
||||
else
|
||||
@stream.unget(char_stack[entityName.length..-1])
|
||||
end
|
||||
else
|
||||
@token_queue << {:type => :ParseError, :data => "expected-named-entity"}
|
||||
@stream.unget(char_stack)
|
||||
end
|
||||
end
|
||||
return char
|
||||
end
|
||||
|
||||
# This method replaces the need for "entityInAttributeValueState".
|
||||
def process_entity_in_attribute
|
||||
entity = consume_entity()
|
||||
if entity
|
||||
@current_token[:data][-1][1] += entity
|
||||
else
|
||||
@current_token[:data][-1][1] += "&"
|
||||
end
|
||||
end
|
||||
|
||||
# This method is a generic handler for emitting the tags. It also sets
|
||||
# the state to "data" because that's what's needed after a token has been
|
||||
# emitted.
|
||||
def emit_current_token
|
||||
# Add token to the queue to be yielded
|
||||
token = @current_token
|
||||
if [:StartTag, :EndTag, :EmptyTag].include?(token[:type])
|
||||
if @lowercase_element_name
|
||||
token[:name] = token[:name].downcase
|
||||
end
|
||||
@token_queue << token
|
||||
@state = :data_state
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
# Below are the various tokenizer states worked out.
|
||||
|
||||
# XXX AT Perhaps we should have Hixie run some evaluation on billions of
|
||||
# documents to figure out what the order of the various if and elsif
|
||||
# statements should be.
|
||||
def data_state
|
||||
data = @stream.char
|
||||
|
||||
if @content_model_flag == :CDATA or @content_model_flag == :RCDATA
|
||||
@lastFourChars << data
|
||||
@lastFourChars.shift if @lastFourChars.length > 4
|
||||
end
|
||||
|
||||
if data == "&" and [:PCDATA,:RCDATA].include?(@content_model_flag) and !@escapeFlag
|
||||
@state = :entity_data_state
|
||||
elsif data == "-" && [:CDATA, :RCDATA].include?(@content_model_flag) && !@escapeFlag && @lastFourChars.join('') == "<!--"
|
||||
@escapeFlag = true
|
||||
@token_queue << {:type => :Characters, :data => data}
|
||||
elsif data == "<" and !@escapeFlag and
|
||||
[:PCDATA,:CDATA,:RCDATA].include?(@content_model_flag)
|
||||
@state = :tag_open_state
|
||||
elsif data == ">" and @escapeFlag and
|
||||
[:CDATA,:RCDATA].include?(@content_model_flag) and
|
||||
@lastFourChars[1..-1].join('') == "-->"
|
||||
@escapeFlag = false
|
||||
@token_queue << {:type => :Characters, :data => data}
|
||||
|
||||
elsif data == :EOF
|
||||
# Tokenization ends.
|
||||
return false
|
||||
|
||||
elsif SPACE_CHARACTERS.include? data
|
||||
# Directly after emitting a token you switch back to the "data
|
||||
# state". At that point SPACE_CHARACTERS are important so they are
|
||||
# emitted separately.
|
||||
# XXX need to check if we don't need a special "spaces" flag on
|
||||
# characters.
|
||||
@token_queue << {:type => :SpaceCharacters, :data => data + @stream.chars_until(SPACE_CHARACTERS, true)}
|
||||
else
|
||||
@token_queue << {:type => :Characters, :data => data + @stream.chars_until(%w[& < > -])}
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def entity_data_state
|
||||
entity = consume_entity
|
||||
if entity
|
||||
@token_queue << {:type => :Characters, :data => entity}
|
||||
else
|
||||
@token_queue << {:type => :Characters, :data => "&"}
|
||||
end
|
||||
@state = :data_state
|
||||
return true
|
||||
end
|
||||
|
||||
def tag_open_state
|
||||
data = @stream.char
|
||||
if @content_model_flag == :PCDATA
|
||||
if data == "!"
|
||||
@state = :markup_declaration_open_state
|
||||
elsif data == "/"
|
||||
@state = :close_tag_open_state
|
||||
elsif data != :EOF and ASCII_LETTERS.include? data
|
||||
@current_token = {:type => :StartTag, :name => data, :data => []}
|
||||
@state = :tag_name_state
|
||||
elsif data == ">"
|
||||
# XXX In theory it could be something besides a tag name. But
|
||||
# do we really care?
|
||||
@token_queue << {:type => :ParseError, :data => "expected-tag-name-but-got-right-bracket"}
|
||||
@token_queue << {:type => :Characters, :data => "<>"}
|
||||
@state = :data_state
|
||||
elsif data == "?"
|
||||
# XXX In theory it could be something besides a tag name. But
|
||||
# do we really care?
|
||||
@token_queue.push({:type => :ParseError, :data => "expected-tag-name-but-got-question-mark"})
|
||||
@stream.unget(data)
|
||||
@state = :bogus_comment_state
|
||||
else
|
||||
# XXX
|
||||
@token_queue << {:type => :ParseError, :data => "expected-tag-name"}
|
||||
@token_queue << {:type => :Characters, :data => "<"}
|
||||
@stream.unget(data)
|
||||
@state = :data_state
|
||||
end
|
||||
else
|
||||
# We know the content model flag is set to either RCDATA or CDATA
|
||||
# now because this state can never be entered with the PLAINTEXT
|
||||
# flag.
|
||||
if data == "/"
|
||||
@state = :close_tag_open_state
|
||||
else
|
||||
@token_queue << {:type => :Characters, :data => "<"}
|
||||
@stream.unget(data)
|
||||
@state = :data_state
|
||||
end
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def close_tag_open_state
|
||||
if (@content_model_flag == :RCDATA or @content_model_flag == :CDATA)
|
||||
if @current_token
|
||||
char_stack = []
|
||||
|
||||
# So far we know that "</" has been consumed. We now need to know
|
||||
# whether the next few characters match the name of last emitted
|
||||
# start tag which also happens to be the current_token. We also need
|
||||
# to have the character directly after the characters that could
|
||||
# match the start tag name.
|
||||
(@current_token[:name].length + 1).times do
|
||||
char_stack.push(@stream.char)
|
||||
# Make sure we don't get hit by :EOF
|
||||
break if char_stack[-1] == :EOF
|
||||
end
|
||||
|
||||
# Since this is just for checking. We put the characters back on
|
||||
# the stack.
|
||||
@stream.unget(char_stack)
|
||||
end
|
||||
|
||||
if @current_token and
|
||||
@current_token[:name].downcase ==
|
||||
char_stack[0...-1].join('').downcase and
|
||||
(SPACE_CHARACTERS + [">", "/", "<", :EOF]).include? char_stack[-1]
|
||||
# Because the characters are correct we can safely switch to
|
||||
# PCDATA mode now. This also means we don't have to do it when
|
||||
# emitting the end tag token.
|
||||
@content_model_flag = :PCDATA
|
||||
else
|
||||
@token_queue << {:type => :Characters, :data => "</"}
|
||||
@state = :data_state
|
||||
|
||||
# Need to return here since we don't want the rest of the
|
||||
# method to be walked through.
|
||||
return true
|
||||
end
|
||||
end
|
||||
|
||||
data = @stream.char
|
||||
if data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "expected-closing-tag-but-got-eof"}
|
||||
@token_queue << {:type => :Characters, :data => "</"}
|
||||
@state = :data_state
|
||||
elsif ASCII_LETTERS.include? data
|
||||
@current_token = {:type => :EndTag, :name => data, :data => []}
|
||||
@state = :tag_name_state
|
||||
elsif data == ">"
|
||||
@token_queue << {:type => :ParseError, :data => "expected-closing-tag-but-got-right-bracket"}
|
||||
@state = :data_state
|
||||
else
|
||||
# XXX data can be _'_...
|
||||
@token_queue << {:type => :ParseError, :data => "expected-closing-tag-but-got-char", :datavars => {:data => data}}
|
||||
@stream.unget(data)
|
||||
@state = :bogus_comment_state
|
||||
end
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
def tag_name_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
@state = :before_attribute_name_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-tag-name"}
|
||||
emit_current_token
|
||||
elsif ASCII_LETTERS.include? data
|
||||
@current_token[:name] += data + @stream.chars_until(ASCII_LETTERS, true)
|
||||
elsif data == ">"
|
||||
emit_current_token
|
||||
elsif data == "/"
|
||||
process_solidus_in_tag
|
||||
@state = :before_attribute_name_state
|
||||
else
|
||||
@current_token[:name] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def before_attribute_name_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
@stream.chars_until(SPACE_CHARACTERS, true)
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "expected-attribute-name-but-got-eof"}
|
||||
emit_current_token
|
||||
elsif ASCII_LETTERS.include? data
|
||||
@current_token[:data].push([data, ""])
|
||||
@state = :attribute_name_state
|
||||
elsif data == ">"
|
||||
emit_current_token
|
||||
elsif data == "/"
|
||||
process_solidus_in_tag
|
||||
else
|
||||
@current_token[:data].push([data, ""])
|
||||
@state = :attribute_name_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def attribute_name_state
|
||||
data = @stream.char
|
||||
leavingThisState = true
|
||||
emitToken = false
|
||||
if data == "="
|
||||
@state = :before_attribute_value_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-attribute-name"}
|
||||
@state = :data_state
|
||||
emitToken = true
|
||||
elsif ASCII_LETTERS.include? data
|
||||
@current_token[:data][-1][0] += data + @stream.chars_until(ASCII_LETTERS, true)
|
||||
leavingThisState = false
|
||||
elsif data == ">"
|
||||
# XXX If we emit here the attributes are converted to a dict
|
||||
# without being checked and when the code below runs we error
|
||||
# because data is a dict not a list
|
||||
emitToken = true
|
||||
elsif SPACE_CHARACTERS.include? data
|
||||
@state = :after_attribute_name_state
|
||||
elsif data == "/"
|
||||
process_solidus_in_tag
|
||||
@state = :before_attribute_name_state
|
||||
else
|
||||
@current_token[:data][-1][0] += data
|
||||
leavingThisState = false
|
||||
end
|
||||
|
||||
if leavingThisState
|
||||
# Attributes are not dropped at this stage. That happens when the
|
||||
# start tag token is emitted so values can still be safely appended
|
||||
# to attributes, but we do want to report the parse error in time.
|
||||
if @lowercase_attr_name
|
||||
@current_token[:data][-1][0] = @current_token[:data].last.first.downcase
|
||||
end
|
||||
@current_token[:data][0...-1].each {|name,value|
|
||||
if @current_token[:data].last.first == name
|
||||
@token_queue << {:type => :ParseError, :data => "duplicate-attribute"}
|
||||
break # don't report an error more than once
|
||||
end
|
||||
}
|
||||
# XXX Fix for above XXX
|
||||
emit_current_token if emitToken
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def after_attribute_name_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
@stream.chars_until(SPACE_CHARACTERS, true)
|
||||
elsif data == "="
|
||||
@state = :before_attribute_value_state
|
||||
elsif data == ">"
|
||||
emit_current_token
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "expected-end-of-tag-but-got-eof"}
|
||||
emit_current_token
|
||||
elsif ASCII_LETTERS.include? data
|
||||
@current_token[:data].push([data, ""])
|
||||
@state = :attribute_name_state
|
||||
elsif data == "/"
|
||||
process_solidus_in_tag
|
||||
@state = :before_attribute_name_state
|
||||
else
|
||||
@current_token[:data].push([data, ""])
|
||||
@state = :attribute_name_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def before_attribute_value_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
@stream.chars_until(SPACE_CHARACTERS, true)
|
||||
elsif data == "\""
|
||||
@state = :attribute_value_double_quoted_state
|
||||
elsif data == "&"
|
||||
@state = :attribute_value_unquoted_state
|
||||
@stream.unget(data);
|
||||
elsif data == "'"
|
||||
@state = :attribute_value_single_quoted_state
|
||||
elsif data == ">"
|
||||
emit_current_token
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "expected-attribute-value-but-got-eof"}
|
||||
emit_current_token
|
||||
else
|
||||
@current_token[:data][-1][1] += data
|
||||
@state = :attribute_value_unquoted_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def attribute_value_double_quoted_state
|
||||
data = @stream.char
|
||||
if data == "\""
|
||||
@state = :before_attribute_name_state
|
||||
elsif data == "&"
|
||||
process_entity_in_attribute
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-attribute-value-double-quote"}
|
||||
emit_current_token
|
||||
else
|
||||
@current_token[:data][-1][1] += data + @stream.chars_until(["\"", "&"])
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def attribute_value_single_quoted_state
|
||||
data = @stream.char
|
||||
if data == "'"
|
||||
@state = :before_attribute_name_state
|
||||
elsif data == "&"
|
||||
process_entity_in_attribute
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-attribute-value-single-quote"}
|
||||
emit_current_token
|
||||
else
|
||||
@current_token[:data][-1][1] += data +\
|
||||
@stream.chars_until(["'", "&"])
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def attribute_value_unquoted_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
@state = :before_attribute_name_state
|
||||
elsif data == "&"
|
||||
process_entity_in_attribute
|
||||
elsif data == ">"
|
||||
emit_current_token
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-attribute-value-no-quotes"}
|
||||
emit_current_token
|
||||
else
|
||||
@current_token[:data][-1][1] += data + @stream.chars_until(["&", ">","<"] + SPACE_CHARACTERS)
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def bogus_comment_state
|
||||
# Make a new comment token and give it as value all the characters
|
||||
# until the first > or :EOF (chars_until checks for :EOF automatically)
|
||||
# and emit it.
|
||||
@token_queue << {:type => :Comment, :data => @stream.chars_until((">"))}
|
||||
|
||||
# Eat the character directly after the bogus comment which is either a
|
||||
# ">" or an :EOF.
|
||||
@stream.char
|
||||
@state = :data_state
|
||||
return true
|
||||
end
|
||||
|
||||
def markup_declaration_open_state
|
||||
char_stack = [@stream.char, @stream.char]
|
||||
if char_stack == ["-", "-"]
|
||||
@current_token = {:type => :Comment, :data => ""}
|
||||
@state = :comment_start_state
|
||||
else
|
||||
5.times { char_stack.push(@stream.char) }
|
||||
# Put in explicit :EOF check
|
||||
if !char_stack.include?(:EOF) && char_stack.join("").upcase == "DOCTYPE"
|
||||
@current_token = {:type => :Doctype, :name => "", :publicId => nil, :systemId => nil, :correct => true}
|
||||
@state = :doctype_state
|
||||
else
|
||||
@token_queue << {:type => :ParseError, :data => "expected-dashes-or-doctype"}
|
||||
@stream.unget(char_stack)
|
||||
@state = :bogus_comment_state
|
||||
end
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def comment_start_state
|
||||
data = @stream.char
|
||||
if data == "-"
|
||||
@state = :comment_start_dash_state
|
||||
elsif data == ">"
|
||||
@token_queue << {:type => :ParseError, :data => "incorrect-comment"}
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-comment"}
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:data] += data + @stream.chars_until("-")
|
||||
@state = :comment_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def comment_start_dash_state
|
||||
data = @stream.char
|
||||
if data == "-"
|
||||
@state = :comment_end_state
|
||||
elsif data == ">"
|
||||
@token_queue << {:type => :ParseError, :data => "incorrect-comment"}
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-comment"}
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:data] += '-' + data + @stream.chars_until("-")
|
||||
@state = :comment_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def comment_state
|
||||
data = @stream.char
|
||||
if data == "-"
|
||||
@state = :comment_end_dash_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-comment"}
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:data] += data + @stream.chars_until("-")
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def comment_end_dash_state
|
||||
data = @stream.char
|
||||
if data == "-"
|
||||
@state = :comment_end_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-comment-end-dash"}
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:data] += "-" + data +\
|
||||
@stream.chars_until("-")
|
||||
# Consume the next character which is either a "-" or an :EOF as
|
||||
# well so if there's a "-" directly after the "-" we go nicely to
|
||||
# the "comment end state" without emitting a ParseError there.
|
||||
@stream.char
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def comment_end_state
|
||||
data = @stream.char
|
||||
if data == ">"
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == "-"
|
||||
@token_queue << {:type => :ParseError, :data => "unexpected-dash-after-double-dash-in-comment"}
|
||||
@current_token[:data] += data
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-comment-double-dash"}
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
# XXX
|
||||
@token_queue << {:type => :ParseError, :data => "unexpected-char-in-comment"}
|
||||
@current_token[:data] += "--" + data
|
||||
@state = :comment_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def doctype_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
@state = :before_doctype_name_state
|
||||
else
|
||||
@token_queue << {:type => :ParseError, :data => "need-space-after-doctype"}
|
||||
@stream.unget(data)
|
||||
@state = :before_doctype_name_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def before_doctype_name_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
elsif data == ">"
|
||||
@token_queue << {:type => :ParseError, :data => "expected-doctype-name-but-got-right-bracket"}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "expected-doctype-name-but-got-eof"}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:name] = data
|
||||
@state = :doctype_name_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def doctype_name_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
@state = :after_doctype_name_state
|
||||
elsif data == ">"
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-doctype-name"}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:name] += data
|
||||
end
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
def after_doctype_name_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include? data
|
||||
elsif data == ">"
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@current_token[:correct] = false
|
||||
@stream.unget(data)
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
char_stack = [data]
|
||||
5.times { char_stack << stream.char }
|
||||
token = char_stack.join('').tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
|
||||
if token == "public" and !char_stack.include?(:EOF)
|
||||
@state = :before_doctype_public_identifier_state
|
||||
elsif token == "system" and !char_stack.include?(:EOF)
|
||||
@state = :before_doctype_system_identifier_state
|
||||
else
|
||||
@stream.unget(char_stack)
|
||||
@token_queue << {:type => :ParseError, :data => "expected-space-or-right-bracket-in-doctype", "datavars" => {"data" => data}}
|
||||
@state = :bogus_doctype_state
|
||||
end
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def before_doctype_public_identifier_state
|
||||
data = @stream.char
|
||||
|
||||
if SPACE_CHARACTERS.include?(data)
|
||||
elsif data == "\""
|
||||
@current_token[:publicId] = ""
|
||||
@state = :doctype_public_identifier_double_quoted_state
|
||||
elsif data == "'"
|
||||
@current_token[:publicId] = ""
|
||||
@state = :doctype_public_identifier_single_quoted_state
|
||||
elsif data == ">"
|
||||
@token_queue << {:type => :ParseError, :data => "unexpected-end-of-doctype"}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@token_queue << {:type => :ParseError, :data => "unexpected-char-in-doctype"}
|
||||
@state = :bogus_doctype_state
|
||||
end
|
||||
|
||||
return true
|
||||
end
|
||||
|
||||
def doctype_public_identifier_double_quoted_state
|
||||
data = @stream.char
|
||||
if data == "\""
|
||||
@state = :after_doctype_public_identifier_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:publicId] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def doctype_public_identifier_single_quoted_state
|
||||
data = @stream.char
|
||||
if data == "'"
|
||||
@state = :after_doctype_public_identifier_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:publicId] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def after_doctype_public_identifier_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include?(data)
|
||||
elsif data == "\""
|
||||
@current_token[:systemId] = ""
|
||||
@state = :doctype_system_identifier_double_quoted_state
|
||||
elsif data == "'"
|
||||
@current_token[:systemId] = ""
|
||||
@state = :doctype_system_identifier_single_quoted_state
|
||||
elsif data == ">"
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
||||
@state = :bogus_doctype_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def before_doctype_system_identifier_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include?(data)
|
||||
elsif data == "\""
|
||||
@current_token[:systemId] = ""
|
||||
@state = :doctype_system_identifier_double_quoted_state
|
||||
elsif data == "'"
|
||||
@current_token[:systemId] = ""
|
||||
@state = :doctype_system_identifier_single_quoted_state
|
||||
elsif data == ">"
|
||||
@token_queue << {:type => :ParseError, :data => "unexpected-char-in-doctype"}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@token_queue << {:type => :ParseError, :data => "unexpected-char-in-doctype"}
|
||||
@state = :bogus_doctype_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def doctype_system_identifier_double_quoted_state
|
||||
data = @stream.char
|
||||
if data == "\""
|
||||
@state = :after_doctype_system_identifier_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:systemId] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def doctype_system_identifier_single_quoted_state
|
||||
data = @stream.char
|
||||
if data == "'"
|
||||
@state = :after_doctype_system_identifier_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@current_token[:systemId] += data
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def after_doctype_system_identifier_state
|
||||
data = @stream.char
|
||||
if SPACE_CHARACTERS.include?(data)
|
||||
elsif data == ">"
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
else
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
||||
@state = :bogus_doctype_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def bogus_doctype_state
|
||||
data = @stream.char
|
||||
@current_token[:correct] = false
|
||||
if data == ">"
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
elsif data == :EOF
|
||||
# XXX EMIT
|
||||
@stream.unget(data)
|
||||
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
|
||||
@current_token[:correct] = false
|
||||
@token_queue << @current_token
|
||||
@state = :data_state
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
24
attic/vendor/plugins/HTML5lib/lib/html5/treebuilders.rb
vendored
Normal file
24
attic/vendor/plugins/HTML5lib/lib/html5/treebuilders.rb
vendored
Normal file
|
@ -0,0 +1,24 @@
|
|||
module HTML5
|
||||
module TreeBuilders
|
||||
|
||||
class << self
|
||||
def [](name)
|
||||
case name.to_s.downcase
|
||||
when 'simpletree' then
|
||||
require 'html5/treebuilders/simpletree'
|
||||
SimpleTree::TreeBuilder
|
||||
when 'rexml' then
|
||||
require 'html5/treebuilders/rexml'
|
||||
REXML::TreeBuilder
|
||||
when 'hpricot' then
|
||||
require 'html5/treebuilders/hpricot'
|
||||
Hpricot::TreeBuilder
|
||||
else
|
||||
raise "Unknown TreeBuilder #{name}"
|
||||
end
|
||||
end
|
||||
|
||||
alias :get_tree_builder :[]
|
||||
end
|
||||
end
|
||||
end
|
334
attic/vendor/plugins/HTML5lib/lib/html5/treebuilders/base.rb
vendored
Executable file
334
attic/vendor/plugins/HTML5lib/lib/html5/treebuilders/base.rb
vendored
Executable file
|
@ -0,0 +1,334 @@
|
|||
require 'html5/constants'
|
||||
|
||||
#XXX - TODO; make the default interface more ElementTree-like rather than DOM-like
|
||||
|
||||
module HTML5
|
||||
|
||||
# The scope markers are inserted when entering buttons, object elements,
|
||||
# marquees, table cells, and table captions, and are used to prevent formatting
|
||||
# from "leaking" into tables, buttons, object elements, and marquees.
|
||||
Marker = nil
|
||||
|
||||
module TreeBuilders
|
||||
module Base
|
||||
|
||||
class Node
|
||||
# The parent of the current node (or nil for the document node)
|
||||
attr_accessor :parent
|
||||
|
||||
# a list of child nodes of the current node. This must
|
||||
# include all elements but not necessarily other node types
|
||||
attr_accessor :childNodes
|
||||
|
||||
# A list of miscellaneous flags that can be set on the node
|
||||
attr_accessor :_flags
|
||||
|
||||
def initialize(name)
|
||||
@parent = nil
|
||||
@childNodes = []
|
||||
@_flags = []
|
||||
end
|
||||
|
||||
# Insert node as a child of the current node
|
||||
def appendChild(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Insert data as text in the current node, positioned before the
|
||||
# start of node insertBefore or to the end of the node's text.
|
||||
def insertText(data, insertBefore=nil)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Insert node as a child of the current node, before refNode in the
|
||||
# list of child nodes. Raises ValueError if refNode is not a child of
|
||||
# the current node
|
||||
def insertBefore(node, refNode)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Remove node from the children of the current node
|
||||
def removeChild(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Move all the children of the current node to newParent.
|
||||
# This is needed so that trees that don't store text as nodes move the
|
||||
# text in the correct way
|
||||
def reparentChildren(newParent)
|
||||
#XXX - should this method be made more general?
|
||||
@childNodes.each { |child| newParent.appendChild(child) }
|
||||
@childNodes = []
|
||||
end
|
||||
|
||||
# Return a shallow copy of the current node i.e. a node with the same
|
||||
# name and attributes but with no parent or child nodes
|
||||
def cloneNode
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Return true if the node has children or text, false otherwise
|
||||
def hasContent
|
||||
raise NotImplementedError
|
||||
end
|
||||
end
|
||||
|
||||
# Base treebuilder implementation
|
||||
class TreeBuilder
|
||||
|
||||
attr_accessor :open_elements
|
||||
|
||||
attr_accessor :activeFormattingElements
|
||||
|
||||
attr_accessor :document
|
||||
|
||||
attr_accessor :head_pointer
|
||||
|
||||
attr_accessor :formPointer
|
||||
|
||||
# Class to use for document root
|
||||
documentClass = nil
|
||||
|
||||
# Class to use for HTML elements
|
||||
elementClass = nil
|
||||
|
||||
# Class to use for comments
|
||||
commentClass = nil
|
||||
|
||||
# Class to use for doctypes
|
||||
doctypeClass = nil
|
||||
|
||||
# Fragment class
|
||||
fragmentClass = nil
|
||||
|
||||
def initialize
|
||||
reset
|
||||
end
|
||||
|
||||
def reset
|
||||
@open_elements = []
|
||||
@activeFormattingElements = []
|
||||
|
||||
#XXX - rename these to headElement, formElement
|
||||
@head_pointer = nil
|
||||
@formPointer = nil
|
||||
|
||||
self.insert_from_table = false
|
||||
|
||||
@document = @documentClass.new
|
||||
end
|
||||
|
||||
def elementInScope(target, tableVariant=false)
|
||||
# Exit early when possible.
|
||||
return true if @open_elements[-1].name == target
|
||||
|
||||
# AT How about while true and simply set node to [-1] and set it to
|
||||
# [-2] at the end...
|
||||
@open_elements.reverse.each do |element|
|
||||
if element.name == target
|
||||
return true
|
||||
elsif element.name == 'table'
|
||||
return false
|
||||
elsif not tableVariant and SCOPING_ELEMENTS.include?(element.name)
|
||||
return false
|
||||
elsif element.name == 'html'
|
||||
return false
|
||||
end
|
||||
end
|
||||
assert false # We should never reach this point
|
||||
end
|
||||
|
||||
def reconstructActiveFormattingElements
|
||||
# Within this algorithm the order of steps described in the
|
||||
# specification is not quite the same as the order of steps in the
|
||||
# code. It should still do the same though.
|
||||
|
||||
# Step 1: stop the algorithm when there's nothing to do.
|
||||
return if @activeFormattingElements.empty?
|
||||
|
||||
# Step 2 and step 3: we start with the last element. So i is -1.
|
||||
i = -1
|
||||
entry = @activeFormattingElements[i]
|
||||
return if entry == Marker or @open_elements.include?(entry)
|
||||
|
||||
# Step 6
|
||||
until entry == Marker or @open_elements.include?(entry)
|
||||
# Step 5: let entry be one earlier in the list.
|
||||
i -= 1
|
||||
begin
|
||||
entry = @activeFormattingElements[i]
|
||||
rescue
|
||||
# Step 4: at this point we need to jump to step 8. By not doing
|
||||
# i += 1 which is also done in step 7 we achieve that.
|
||||
break
|
||||
end
|
||||
end
|
||||
while true
|
||||
# Step 7
|
||||
i += 1
|
||||
|
||||
# Step 8
|
||||
clone = @activeFormattingElements[i].cloneNode
|
||||
|
||||
# Step 9
|
||||
element = insert_element(clone.name, clone.attributes)
|
||||
|
||||
# Step 10
|
||||
@activeFormattingElements[i] = element
|
||||
|
||||
# Step 11
|
||||
break if element == @activeFormattingElements[-1]
|
||||
end
|
||||
end
|
||||
|
||||
def clearActiveFormattingElements
|
||||
{} until @activeFormattingElements.empty? || @activeFormattingElements.pop == Marker
|
||||
end
|
||||
|
||||
# Check if an element exists between the end of the active
|
||||
# formatting elements and the last marker. If it does, return it, else
|
||||
# return false
|
||||
def elementInActiveFormattingElements(name)
|
||||
@activeFormattingElements.reverse.each do |element|
|
||||
# Check for Marker first because if it's a Marker it doesn't have a
|
||||
# name attribute.
|
||||
break if element == Marker
|
||||
return element if element.name == name
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
def insertDoctype(name, public_id, system_id)
|
||||
doctype = @doctypeClass.new(name)
|
||||
doctype.public_id = public_id
|
||||
doctype.system_id = system_id
|
||||
@document.appendChild(doctype)
|
||||
end
|
||||
|
||||
def insert_comment(data, parent=nil)
|
||||
parent = @open_elements[-1] if parent.nil?
|
||||
parent.appendChild(@commentClass.new(data))
|
||||
end
|
||||
|
||||
# Create an element but don't insert it anywhere
|
||||
def createElement(name, attributes)
|
||||
element = @elementClass.new(name)
|
||||
element.attributes = attributes
|
||||
return element
|
||||
end
|
||||
|
||||
# Switch the function used to insert an element from the
|
||||
# normal one to the misnested table one and back again
|
||||
def insert_from_table=(value)
|
||||
@insert_from_table = value
|
||||
@insert_element = value ? :insert_elementTable : :insert_elementNormal
|
||||
end
|
||||
|
||||
def insert_element(name, attributes)
|
||||
send(@insert_element, name, attributes)
|
||||
end
|
||||
|
||||
def insert_elementNormal(name, attributes)
|
||||
element = @elementClass.new(name)
|
||||
element.attributes = attributes
|
||||
@open_elements.last.appendChild(element)
|
||||
@open_elements.push(element)
|
||||
return element
|
||||
end
|
||||
|
||||
# Create an element and insert it into the tree
|
||||
def insert_elementTable(name, attributes)
|
||||
element = @elementClass.new(name)
|
||||
element.attributes = attributes
|
||||
if TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements.last.name)
|
||||
#We should be in the InTable mode. This means we want to do
|
||||
#special magic element rearranging
|
||||
parent, insertBefore = getTableMisnestedNodePosition
|
||||
if insertBefore.nil?
|
||||
parent.appendChild(element)
|
||||
else
|
||||
parent.insertBefore(element, insertBefore)
|
||||
end
|
||||
@open_elements.push(element)
|
||||
else
|
||||
return insert_elementNormal(name, attributes)
|
||||
end
|
||||
return element
|
||||
end
|
||||
|
||||
def insertText(data, parent=nil)
|
||||
parent = @open_elements[-1] if parent.nil?
|
||||
|
||||
if (not(@insert_from_table) or (@insert_from_table and not TABLE_INSERT_MODE_ELEMENTS.include?(@open_elements[-1].name)))
|
||||
parent.insertText(data)
|
||||
else
|
||||
#We should be in the InTable mode. This means we want to do
|
||||
#special magic element rearranging
|
||||
parent, insertBefore = getTableMisnestedNodePosition
|
||||
parent.insertText(data, insertBefore)
|
||||
end
|
||||
end
|
||||
|
||||
# Get the foster parent element, and sibling to insert before
|
||||
# (or nil) when inserting a misnested table node
|
||||
def getTableMisnestedNodePosition
|
||||
#The foster parent element is the one which comes before the most
|
||||
#recently opened table element
|
||||
#XXX - this is really inelegant
|
||||
lastTable = nil
|
||||
fosterParent = nil
|
||||
insertBefore = nil
|
||||
@open_elements.reverse.each do |element|
|
||||
if element.name == "table"
|
||||
lastTable = element
|
||||
break
|
||||
end
|
||||
end
|
||||
if lastTable
|
||||
#XXX - we should really check that this parent is actually a
|
||||
#node here
|
||||
if lastTable.parent
|
||||
fosterParent = lastTable.parent
|
||||
insertBefore = lastTable
|
||||
else
|
||||
fosterParent = @open_elements[@open_elements.index(lastTable) - 1]
|
||||
end
|
||||
else
|
||||
fosterParent = @open_elements[0]
|
||||
end
|
||||
return fosterParent, insertBefore
|
||||
end
|
||||
|
||||
def generateImpliedEndTags(exclude=nil)
|
||||
name = @open_elements[-1].name
|
||||
|
||||
# XXX td, th and tr are not actually needed
|
||||
if (%w[dd dt li p td th tr].include?(name) and name != exclude)
|
||||
@open_elements.pop
|
||||
# XXX This is not entirely what the specification says. We should
|
||||
# investigate it more closely.
|
||||
generateImpliedEndTags(exclude)
|
||||
end
|
||||
end
|
||||
|
||||
def get_document
|
||||
@document
|
||||
end
|
||||
|
||||
def get_fragment
|
||||
#assert @inner_html
|
||||
fragment = @fragmentClass.new
|
||||
@open_elements[0].reparentChildren(fragment)
|
||||
return fragment
|
||||
end
|
||||
|
||||
# Serialize the subtree of node in the format required by unit tests
|
||||
# node - the node from which to start serializing
|
||||
def testSerializer(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
231
attic/vendor/plugins/HTML5lib/lib/html5/treebuilders/hpricot.rb
vendored
Normal file
231
attic/vendor/plugins/HTML5lib/lib/html5/treebuilders/hpricot.rb
vendored
Normal file
|
@ -0,0 +1,231 @@
|
|||
require 'html5/treebuilders/base'
|
||||
require 'rubygems'
|
||||
require 'hpricot'
|
||||
require 'forwardable'
|
||||
|
||||
module HTML5
|
||||
module TreeBuilders
|
||||
module Hpricot
|
||||
|
||||
class Node < Base::Node
|
||||
extend Forwardable
|
||||
|
||||
def_delegators :@hpricot, :name
|
||||
|
||||
attr_accessor :hpricot
|
||||
|
||||
def initialize(name)
|
||||
super(name)
|
||||
@hpricot = self.class.hpricot_class.new name
|
||||
end
|
||||
|
||||
def appendChild(node)
|
||||
if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
|
||||
childNodes.last.hpricot.content = childNodes.last.hpricot.content + node.hpricot.content
|
||||
else
|
||||
childNodes << node
|
||||
hpricot.children << node.hpricot
|
||||
end
|
||||
if (oldparent = node.hpricot.parent) != nil
|
||||
oldparent.children.delete_at(oldparent.children.index(node.hpricot))
|
||||
end
|
||||
node.hpricot.parent = hpricot
|
||||
node.parent = self
|
||||
end
|
||||
|
||||
def removeChild(node)
|
||||
childNodes.delete(node)
|
||||
hpricot.children.delete_at(hpricot.children.index(node.hpricot))
|
||||
node.hpricot.parent = nil
|
||||
node.parent = nil
|
||||
end
|
||||
|
||||
def insertText(data, before=nil)
|
||||
if before
|
||||
insertBefore(TextNode.new(data), before)
|
||||
else
|
||||
appendChild(TextNode.new(data))
|
||||
end
|
||||
end
|
||||
|
||||
def insertBefore(node, refNode)
|
||||
index = childNodes.index(refNode)
|
||||
if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
|
||||
childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
|
||||
else
|
||||
refNode.hpricot.parent.insert_before(node.hpricot,refNode.hpricot)
|
||||
childNodes.insert(index, node)
|
||||
end
|
||||
end
|
||||
|
||||
def hasContent
|
||||
childNodes.any?
|
||||
end
|
||||
end
|
||||
|
||||
class Element < Node
|
||||
def self.hpricot_class
|
||||
::Hpricot::Elem
|
||||
end
|
||||
|
||||
def initialize(name)
|
||||
super(name)
|
||||
|
||||
@hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
|
||||
end
|
||||
|
||||
def name
|
||||
@hpricot.stag.name
|
||||
end
|
||||
|
||||
def cloneNode
|
||||
attributes.inject(self.class.new(name)) do |node, (name, value)|
|
||||
node.hpricot[name] = value
|
||||
node
|
||||
end
|
||||
end
|
||||
|
||||
# A call to Hpricot::Elem#raw_attributes is built dynamically,
|
||||
# so alterations to the returned value (a hash) will be lost.
|
||||
#
|
||||
# AttributeProxy works around this by forwarding :[]= calls
|
||||
# to the raw_attributes accessor on the element start tag.
|
||||
#
|
||||
class AttributeProxy
|
||||
def initialize(hpricot)
|
||||
@hpricot = hpricot
|
||||
end
|
||||
|
||||
def []=(k, v)
|
||||
@hpricot.stag.send(stag_attributes_method)[k] = v
|
||||
end
|
||||
|
||||
def stag_attributes_method
|
||||
# STag#attributes changed to STag#raw_attributes after Hpricot 0.5
|
||||
@hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
|
||||
end
|
||||
|
||||
def method_missing(*a, &b)
|
||||
@hpricot.attributes.send(*a, &b)
|
||||
end
|
||||
end
|
||||
|
||||
def attributes
|
||||
AttributeProxy.new(@hpricot)
|
||||
end
|
||||
|
||||
def attributes=(attrs)
|
||||
attrs.each { |name, value| @hpricot[name] = value }
|
||||
end
|
||||
|
||||
def printTree(indent=0)
|
||||
tree = "\n|#{' ' * indent}<#{name}>"
|
||||
indent += 2
|
||||
attributes.each do |name, value|
|
||||
next if name == 'xmlns'
|
||||
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
|
||||
end
|
||||
childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
|
||||
end
|
||||
end
|
||||
|
||||
class Document < Node
|
||||
def self.hpricot_class
|
||||
::Hpricot::Doc
|
||||
end
|
||||
|
||||
def initialize
|
||||
super(nil)
|
||||
end
|
||||
|
||||
def printTree(indent=0)
|
||||
childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
|
||||
end
|
||||
end
|
||||
|
||||
class DocumentType < Node
|
||||
def_delegators :@hpricot, :public_id, :system_id
|
||||
|
||||
def self.hpricot_class
|
||||
::Hpricot::DocType
|
||||
end
|
||||
|
||||
def initialize(name, public_id, system_id)
|
||||
begin
|
||||
super(name)
|
||||
rescue ArgumentError # needs 3...
|
||||
end
|
||||
|
||||
@hpricot = ::Hpricot::DocType.new(name, public_id, system_id)
|
||||
end
|
||||
|
||||
def printTree(indent=0)
|
||||
if hpricot.target and hpricot.target.any?
|
||||
"\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
|
||||
else
|
||||
"\n|#{' ' * indent}<!DOCTYPE >"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class DocumentFragment < Element
|
||||
def initialize
|
||||
super('')
|
||||
end
|
||||
|
||||
def printTree(indent=0)
|
||||
childNodes.inject('') {|tree, child| tree + child.printTree(indent + 2) }
|
||||
end
|
||||
end
|
||||
|
||||
class TextNode < Node
|
||||
def initialize(data)
|
||||
@hpricot = ::Hpricot::Text.new(data)
|
||||
end
|
||||
|
||||
def printTree(indent=0)
|
||||
"\n|#{' ' * indent}\"#{hpricot.content}\""
|
||||
end
|
||||
end
|
||||
|
||||
class CommentNode < Node
|
||||
def self.hpricot_class
|
||||
::Hpricot::Comment
|
||||
end
|
||||
|
||||
def printTree(indent=0)
|
||||
"\n|#{' ' * indent}<!-- #{hpricot.content} -->"
|
||||
end
|
||||
end
|
||||
|
||||
class TreeBuilder < Base::TreeBuilder
|
||||
def initialize
|
||||
@documentClass = Document
|
||||
@doctypeClass = DocumentType
|
||||
@elementClass = Element
|
||||
@commentClass = CommentNode
|
||||
@fragmentClass = DocumentFragment
|
||||
end
|
||||
|
||||
def insertDoctype(name, public_id, system_id)
|
||||
doctype = @doctypeClass.new(name, public_id, system_id)
|
||||
@document.appendChild(doctype)
|
||||
end
|
||||
|
||||
def testSerializer(node)
|
||||
node.printTree
|
||||
end
|
||||
|
||||
def get_document
|
||||
@document.hpricot
|
||||
end
|
||||
|
||||
def get_fragment
|
||||
@document = super
|
||||
return @document.hpricot.children
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
209
attic/vendor/plugins/HTML5lib/lib/html5/treebuilders/rexml.rb
vendored
Normal file
209
attic/vendor/plugins/HTML5lib/lib/html5/treebuilders/rexml.rb
vendored
Normal file
|
@ -0,0 +1,209 @@
|
|||
require 'html5/treebuilders/base'
|
||||
require 'rexml/document'
|
||||
require 'forwardable'
|
||||
|
||||
module HTML5
|
||||
module TreeBuilders
|
||||
module REXML
|
||||
|
||||
class Node < Base::Node
|
||||
extend Forwardable
|
||||
def_delegators :@rxobj, :name, :attributes
|
||||
attr_accessor :rxobj
|
||||
|
||||
def initialize name
|
||||
super name
|
||||
@rxobj = self.class.rxclass.new name
|
||||
end
|
||||
|
||||
def appendChild node
|
||||
if node.kind_of?(TextNode) && childNodes.length > 0 && childNodes.last.kind_of?(TextNode)
|
||||
childNodes.last.rxobj.value = childNodes.last.rxobj.to_s + node.rxobj.to_s
|
||||
childNodes.last.rxobj.raw = true
|
||||
else
|
||||
childNodes.push node
|
||||
rxobj.add node.rxobj
|
||||
end
|
||||
node.parent = self
|
||||
end
|
||||
|
||||
def removeChild node
|
||||
childNodes.delete node
|
||||
rxobj.delete node.rxobj
|
||||
node.parent = nil
|
||||
end
|
||||
|
||||
def insertText data, before=nil
|
||||
if before
|
||||
insertBefore TextNode.new(data), before
|
||||
else
|
||||
appendChild TextNode.new(data)
|
||||
end
|
||||
end
|
||||
|
||||
def insertBefore node, refNode
|
||||
index = childNodes.index(refNode)
|
||||
if node.kind_of?(TextNode) and index > 0 && childNodes[index-1].kind_of?(TextNode)
|
||||
childNodes[index-1].rxobj.value = childNodes[index-1].rxobj.to_s + node.rxobj.to_s
|
||||
childNodes[index-1].rxobj.raw = true
|
||||
else
|
||||
childNodes.insert index, node
|
||||
refNode.rxobj.parent.insert_before(refNode.rxobj,node.rxobj)
|
||||
end
|
||||
end
|
||||
|
||||
def hasContent
|
||||
(childNodes.length > 0)
|
||||
end
|
||||
end
|
||||
|
||||
class Element < Node
|
||||
def self.rxclass
|
||||
::REXML::Element
|
||||
end
|
||||
|
||||
def initialize name
|
||||
super name
|
||||
end
|
||||
|
||||
def cloneNode
|
||||
newNode = self.class.new name
|
||||
attributes.each {|name,value| newNode.attributes[name] = value}
|
||||
newNode
|
||||
end
|
||||
|
||||
def attributes= value
|
||||
value.each {|name, value| rxobj.attributes[name] = value}
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
tree = "\n|#{' ' * indent}<#{name}>"
|
||||
indent += 2
|
||||
for name, value in attributes
|
||||
next if name == 'xmlns'
|
||||
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
|
||||
end
|
||||
for child in childNodes
|
||||
tree += child.printTree(indent)
|
||||
end
|
||||
tree
|
||||
end
|
||||
end
|
||||
|
||||
class Document < Node
|
||||
def self.rxclass
|
||||
::REXML::Document
|
||||
end
|
||||
|
||||
def initialize
|
||||
super nil
|
||||
end
|
||||
|
||||
# ryansking: not sure why this was here. removing it doesn't cause any tests to fail
|
||||
# def appendChild node
|
||||
# if node.kind_of? Element and node.name == 'html'
|
||||
# node.rxobj.add_namespace('http://www.w3.org/1999/xhtml')
|
||||
# end
|
||||
# super node
|
||||
# end
|
||||
|
||||
def printTree indent=0
|
||||
tree = "#document"
|
||||
for child in childNodes
|
||||
tree += child.printTree(indent + 2)
|
||||
end
|
||||
return tree
|
||||
end
|
||||
end
|
||||
|
||||
class DocumentType < Node
|
||||
def_delegator :@rxobj, :public, :public_id
|
||||
|
||||
def_delegator :@rxobj, :system, :system_id
|
||||
|
||||
def self.rxclass
|
||||
::REXML::DocType
|
||||
end
|
||||
|
||||
def initialize name, public_id, system_id
|
||||
super(name)
|
||||
if public_id
|
||||
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::PUBLIC, public_id, system_id]
|
||||
elsif system_id
|
||||
@rxobj = ::REXML::DocType.new [name, ::REXML::DocType::SYSTEM, nil, system_id]
|
||||
else
|
||||
@rxobj = ::REXML::DocType.new name
|
||||
end
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
"\n|#{' ' * indent}<!DOCTYPE #{name}>"
|
||||
end
|
||||
end
|
||||
|
||||
class DocumentFragment < Element
|
||||
def initialize
|
||||
super nil
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
tree = ""
|
||||
for child in childNodes
|
||||
tree += child.printTree(indent+2)
|
||||
end
|
||||
return tree
|
||||
end
|
||||
end
|
||||
|
||||
class TextNode < Node
|
||||
def initialize data
|
||||
raw = data.gsub('&', '&').gsub('<', '<').gsub('>', '>')
|
||||
@rxobj = ::REXML::Text.new(raw, true, nil, true)
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
"\n|#{' ' * indent}\"#{rxobj.value}\""
|
||||
end
|
||||
end
|
||||
|
||||
class CommentNode < Node
|
||||
def self.rxclass
|
||||
::REXML::Comment
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
"\n|#{' ' * indent}<!-- #{rxobj.string} -->"
|
||||
end
|
||||
end
|
||||
|
||||
class TreeBuilder < Base::TreeBuilder
|
||||
def initialize
|
||||
@documentClass = Document
|
||||
@doctypeClass = DocumentType
|
||||
@elementClass = Element
|
||||
@commentClass = CommentNode
|
||||
@fragmentClass = DocumentFragment
|
||||
end
|
||||
|
||||
def insertDoctype(name, public_id, system_id)
|
||||
doctype = @doctypeClass.new(name, public_id, system_id)
|
||||
@document.appendChild(doctype)
|
||||
end
|
||||
|
||||
def testSerializer node
|
||||
node.printTree
|
||||
end
|
||||
|
||||
def get_document
|
||||
@document.rxobj
|
||||
end
|
||||
|
||||
def get_fragment
|
||||
@document = super
|
||||
return @document.rxobj.children
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
185
attic/vendor/plugins/HTML5lib/lib/html5/treebuilders/simpletree.rb
vendored
Normal file
185
attic/vendor/plugins/HTML5lib/lib/html5/treebuilders/simpletree.rb
vendored
Normal file
|
@ -0,0 +1,185 @@
|
|||
require 'html5/treebuilders/base'
|
||||
|
||||
module HTML5
|
||||
module TreeBuilders
|
||||
module SimpleTree
|
||||
|
||||
class Node < Base::Node
|
||||
# Node representing an item in the tree.
|
||||
# name - The tag name associated with the node
|
||||
attr_accessor :name
|
||||
|
||||
# The value of the current node (applies to text nodes and
|
||||
# comments
|
||||
attr_accessor :value
|
||||
|
||||
# a dict holding name, value pairs for attributes of the node
|
||||
attr_accessor :attributes
|
||||
|
||||
def initialize name
|
||||
super
|
||||
@name = name
|
||||
@value = nil
|
||||
@attributes = {}
|
||||
end
|
||||
|
||||
def appendChild node
|
||||
if node.kind_of? TextNode and
|
||||
childNodes.length > 0 and childNodes.last.kind_of? TextNode
|
||||
childNodes.last.value += node.value
|
||||
else
|
||||
childNodes << node
|
||||
end
|
||||
node.parent = self
|
||||
end
|
||||
|
||||
def removeChild node
|
||||
childNodes.delete node
|
||||
node.parent = nil
|
||||
end
|
||||
|
||||
def cloneNode
|
||||
newNode = self.class.new name
|
||||
attributes.each {|name,value| newNode.attributes[name] = value}
|
||||
newNode.value = value
|
||||
newNode
|
||||
end
|
||||
|
||||
def insertText data, before=nil
|
||||
if before
|
||||
insertBefore TextNode.new(data), before
|
||||
else
|
||||
appendChild TextNode.new(data)
|
||||
end
|
||||
end
|
||||
|
||||
def insertBefore node, refNode
|
||||
index = childNodes.index(refNode)
|
||||
if node.kind_of?(TextNode) && index > 0 && childNodes[index-1].kind_of?(TextNode)
|
||||
childNodes[index-1].value += node.value
|
||||
else
|
||||
childNodes.insert index, node
|
||||
end
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
tree = "\n|%s%s" % [' '* indent, self.to_s]
|
||||
for child in childNodes
|
||||
tree += child.printTree(indent + 2)
|
||||
end
|
||||
return tree
|
||||
end
|
||||
|
||||
def hasContent
|
||||
childNodes.length > 0
|
||||
end
|
||||
end
|
||||
|
||||
class Element < Node
|
||||
def to_s
|
||||
"<#{name}>"
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
tree = "\n|%s%s" % [' '* indent, self.to_s]
|
||||
indent += 2
|
||||
for name, value in attributes
|
||||
tree += "\n|%s%s=\"%s\"" % [' ' * indent, name, value]
|
||||
end
|
||||
for child in childNodes
|
||||
tree += child.printTree(indent)
|
||||
end
|
||||
tree
|
||||
end
|
||||
end
|
||||
|
||||
class Document < Node
|
||||
def to_s
|
||||
"#document"
|
||||
end
|
||||
|
||||
def initialize
|
||||
super nil
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
tree = to_s
|
||||
for child in childNodes
|
||||
tree += child.printTree(indent + 2)
|
||||
end
|
||||
tree
|
||||
end
|
||||
end
|
||||
|
||||
class DocumentType < Node
|
||||
attr_accessor :public_id, :system_id
|
||||
|
||||
def to_s
|
||||
"<!DOCTYPE #{name}>"
|
||||
end
|
||||
|
||||
def initialize name
|
||||
super name
|
||||
@public_id = nil
|
||||
@system_id = nil
|
||||
end
|
||||
end
|
||||
|
||||
class DocumentFragment < Element
|
||||
def initialize
|
||||
super nil
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
tree = ""
|
||||
for child in childNodes
|
||||
tree += child.printTree(indent+2)
|
||||
end
|
||||
return tree
|
||||
end
|
||||
end
|
||||
|
||||
class TextNode < Node
|
||||
def initialize value
|
||||
super nil
|
||||
@value = value
|
||||
end
|
||||
|
||||
def to_s
|
||||
'"%s"' % value
|
||||
end
|
||||
end
|
||||
|
||||
class CommentNode < Node
|
||||
def initialize value
|
||||
super nil
|
||||
@value = value
|
||||
end
|
||||
|
||||
def to_s
|
||||
"<!-- %s -->" % value
|
||||
end
|
||||
end
|
||||
|
||||
class TreeBuilder < Base::TreeBuilder
|
||||
def initialize
|
||||
@documentClass = Document
|
||||
@doctypeClass = DocumentType
|
||||
@elementClass = Element
|
||||
@commentClass = CommentNode
|
||||
@fragmentClass = DocumentFragment
|
||||
end
|
||||
|
||||
def testSerializer node
|
||||
node.printTree
|
||||
end
|
||||
|
||||
def get_fragment
|
||||
@document = super
|
||||
@document
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
26
attic/vendor/plugins/HTML5lib/lib/html5/treewalkers.rb
vendored
Normal file
26
attic/vendor/plugins/HTML5lib/lib/html5/treewalkers.rb
vendored
Normal file
|
@ -0,0 +1,26 @@
|
|||
require 'html5/treewalkers/base'
|
||||
|
||||
module HTML5
|
||||
module TreeWalkers
|
||||
|
||||
class << self
|
||||
def [](name)
|
||||
case name.to_s.downcase
|
||||
when 'simpletree'
|
||||
require 'html5/treewalkers/simpletree'
|
||||
SimpleTree::TreeWalker
|
||||
when 'rexml'
|
||||
require 'html5/treewalkers/rexml'
|
||||
REXML::TreeWalker
|
||||
when 'hpricot'
|
||||
require 'html5/treewalkers/hpricot'
|
||||
Hpricot::TreeWalker
|
||||
else
|
||||
raise "Unknown TreeWalker #{name}"
|
||||
end
|
||||
end
|
||||
|
||||
alias :get_tree_walker :[]
|
||||
end
|
||||
end
|
||||
end
|
162
attic/vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb
vendored
Normal file
162
attic/vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb
vendored
Normal file
|
@ -0,0 +1,162 @@
|
|||
require 'html5/constants'
|
||||
module HTML5
|
||||
module TreeWalkers
|
||||
|
||||
module TokenConstructor
|
||||
def error(msg)
|
||||
{:type => "SerializeError", :data => msg}
|
||||
end
|
||||
|
||||
def normalize_attrs(attrs)
|
||||
attrs.to_a
|
||||
end
|
||||
|
||||
def empty_tag(name, attrs, has_children=false)
|
||||
error(_("Void element has children")) if has_children
|
||||
{:type => :EmptyTag, :name => name, :data => normalize_attrs(attrs)}
|
||||
end
|
||||
|
||||
def start_tag(name, attrs)
|
||||
{:type => :StartTag, :name => name, :data => normalize_attrs(attrs)}
|
||||
end
|
||||
|
||||
def end_tag(name)
|
||||
{:type => :EndTag, :name => name, :data => []}
|
||||
end
|
||||
|
||||
def text(data)
|
||||
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
|
||||
yield({:type => :SpaceCharacters, :data => $1})
|
||||
data = data[$1.length .. -1]
|
||||
return if data.empty?
|
||||
end
|
||||
|
||||
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
|
||||
yield({:type => :Characters, :data => data[0 ... -$1.length]})
|
||||
yield({:type => :SpaceCharacters, :data => $1})
|
||||
else
|
||||
yield({:type => :Characters, :data => data})
|
||||
end
|
||||
end
|
||||
|
||||
def comment(data)
|
||||
{:type => :Comment, :data => data}
|
||||
end
|
||||
|
||||
def doctype(name, public_id, system_id, correct=nil)
|
||||
{:type => :Doctype, :name => name, :public_id => public_id, :system_id => system_id, :correct => correct}
|
||||
end
|
||||
|
||||
def unknown(nodeType)
|
||||
error(_("Unknown node type: ") + nodeType.to_s)
|
||||
end
|
||||
|
||||
def _(str)
|
||||
str
|
||||
end
|
||||
end
|
||||
|
||||
class Base
|
||||
include TokenConstructor
|
||||
|
||||
def initialize(tree)
|
||||
@tree = tree
|
||||
end
|
||||
|
||||
def each
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
alias walk each
|
||||
|
||||
def to_ary
|
||||
a = []
|
||||
each do |i|
|
||||
a << i
|
||||
end
|
||||
a
|
||||
end
|
||||
end
|
||||
|
||||
class NonRecursiveTreeWalker < TreeWalkers::Base
|
||||
def node_details(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def first_child(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def next_sibling(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def parent(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def each
|
||||
current_node = @tree
|
||||
while current_node != nil
|
||||
details = node_details(current_node)
|
||||
has_children = false
|
||||
|
||||
case details.shift
|
||||
when :DOCTYPE
|
||||
yield doctype(*details)
|
||||
|
||||
when :TEXT
|
||||
text(*details) {|token| yield token}
|
||||
|
||||
when :ELEMENT
|
||||
name, attributes, has_children = details
|
||||
if VOID_ELEMENTS.include?(name)
|
||||
yield empty_tag(name, attributes.to_a, has_children)
|
||||
has_children = false
|
||||
else
|
||||
yield start_tag(name, attributes.to_a)
|
||||
end
|
||||
|
||||
when :COMMENT
|
||||
yield comment(details[0])
|
||||
|
||||
when :DOCUMENT, :DOCUMENT_FRAGMENT
|
||||
has_children = true
|
||||
|
||||
when nil
|
||||
# ignore (REXML::XMLDecl is an example)
|
||||
|
||||
else
|
||||
yield unknown(details[0])
|
||||
end
|
||||
|
||||
first_child = has_children ? first_child(current_node) : nil
|
||||
if first_child != nil
|
||||
current_node = first_child
|
||||
else
|
||||
while current_node != nil
|
||||
details = node_details(current_node)
|
||||
if details.shift == :ELEMENT
|
||||
name, attributes, has_children = details
|
||||
yield end_tag(name) if !VOID_ELEMENTS.include?(name)
|
||||
end
|
||||
|
||||
if @tree == current_node
|
||||
current_node = nil
|
||||
else
|
||||
next_sibling = next_sibling(current_node)
|
||||
if next_sibling != nil
|
||||
current_node = next_sibling
|
||||
break
|
||||
end
|
||||
|
||||
current_node = parent(current_node)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
48
attic/vendor/plugins/HTML5lib/lib/html5/treewalkers/hpricot.rb
vendored
Normal file
48
attic/vendor/plugins/HTML5lib/lib/html5/treewalkers/hpricot.rb
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
require 'html5/treewalkers/base'
|
||||
require 'rexml/document'
|
||||
|
||||
module HTML5
|
||||
module TreeWalkers
|
||||
module Hpricot
|
||||
class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker
|
||||
|
||||
def node_details(node)
|
||||
case node
|
||||
when ::Hpricot::Elem
|
||||
if node.name.empty?
|
||||
[:DOCUMENT_FRAGMENT]
|
||||
else
|
||||
[:ELEMENT, node.name,
|
||||
node.attributes.map {|name, value| [name, value]},
|
||||
!node.empty?]
|
||||
end
|
||||
when ::Hpricot::Text
|
||||
[:TEXT, node.content]
|
||||
when ::Hpricot::Comment
|
||||
[:COMMENT, node.content]
|
||||
when ::Hpricot::Doc
|
||||
[:DOCUMENT]
|
||||
when ::Hpricot::DocType
|
||||
[:DOCTYPE, node.target, node.public_id, node.system_id]
|
||||
when ::Hpricot::XMLDecl
|
||||
[nil]
|
||||
else
|
||||
[:UNKNOWN, node.class.inspect]
|
||||
end
|
||||
end
|
||||
|
||||
def first_child(node)
|
||||
node.children.first
|
||||
end
|
||||
|
||||
def next_sibling(node)
|
||||
node.next_node
|
||||
end
|
||||
|
||||
def parent(node)
|
||||
node.parent
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
48
attic/vendor/plugins/HTML5lib/lib/html5/treewalkers/rexml.rb
vendored
Normal file
48
attic/vendor/plugins/HTML5lib/lib/html5/treewalkers/rexml.rb
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
require 'html5/treewalkers/base'
|
||||
require 'rexml/document'
|
||||
|
||||
module HTML5
|
||||
module TreeWalkers
|
||||
module REXML
|
||||
class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker
|
||||
|
||||
def node_details(node)
|
||||
case node
|
||||
when ::REXML::Document
|
||||
[:DOCUMENT]
|
||||
when ::REXML::Element
|
||||
if !node.name
|
||||
[:DOCUMENT_FRAGMENT]
|
||||
else
|
||||
[:ELEMENT, node.name,
|
||||
node.attributes.map {|name,value| [name,value]},
|
||||
node.has_elements? || node.has_text?]
|
||||
end
|
||||
when ::REXML::Text
|
||||
[:TEXT, node.value]
|
||||
when ::REXML::Comment
|
||||
[:COMMENT, node.string]
|
||||
when ::REXML::DocType
|
||||
[:DOCTYPE, node.name, node.public, node.system]
|
||||
when ::REXML::XMLDecl
|
||||
[nil]
|
||||
else
|
||||
[:UNKNOWN, node.class.inspect]
|
||||
end
|
||||
end
|
||||
|
||||
def first_child(node)
|
||||
node.children.first
|
||||
end
|
||||
|
||||
def next_sibling(node)
|
||||
node.next_sibling
|
||||
end
|
||||
|
||||
def parent(node)
|
||||
node.parent
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
48
attic/vendor/plugins/HTML5lib/lib/html5/treewalkers/simpletree.rb
vendored
Normal file
48
attic/vendor/plugins/HTML5lib/lib/html5/treewalkers/simpletree.rb
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
require 'html5/treewalkers/base'
|
||||
|
||||
module HTML5
|
||||
module TreeWalkers
|
||||
module SimpleTree
|
||||
class TreeWalker < HTML5::TreeWalkers::Base
|
||||
include HTML5::TreeBuilders::SimpleTree
|
||||
|
||||
def walk(node)
|
||||
case node
|
||||
when Document, DocumentFragment
|
||||
return
|
||||
|
||||
when DocumentType
|
||||
yield doctype(node.name, node.public_id, node.system_id)
|
||||
|
||||
when TextNode
|
||||
text(node.value) {|token| yield token}
|
||||
|
||||
when Element
|
||||
if VOID_ELEMENTS.include?(node.name)
|
||||
yield empty_tag(node.name, node.attributes, node.hasContent())
|
||||
else
|
||||
yield start_tag(node.name, node.attributes)
|
||||
for child in node.childNodes
|
||||
walk(child) {|token| yield token}
|
||||
end
|
||||
yield end_tag(node.name)
|
||||
end
|
||||
|
||||
when CommentNode
|
||||
yield comment(node.value)
|
||||
|
||||
else
|
||||
puts '?'
|
||||
yield unknown(node.class)
|
||||
end
|
||||
end
|
||||
|
||||
def each
|
||||
for child in @tree.childNodes
|
||||
walk(child) {|node| yield node}
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
3
attic/vendor/plugins/HTML5lib/lib/html5/version.rb
vendored
Normal file
3
attic/vendor/plugins/HTML5lib/lib/html5/version.rb
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
module HTML5
|
||||
VERSION = '0.10.1'
|
||||
end
|
Loading…
Add table
Add a link
Reference in a new issue