a6429f8c22
Completely removed the html5lib sanitizer. Fixed the string-handling to work in both Ruby 1.8.x and 1.9.2. There are still, inexplicably, two functional tests that fail. But the rest seems to work quite well.
831 lines
32 KiB
Ruby
Executable file
831 lines
32 KiB
Ruby
Executable file
# HTML 5 conformance checker
|
|
#
|
|
# Warning: this module is experimental, incomplete, and subject to removal at any time.
|
|
#
|
|
# Usage:
|
|
# >>> from html5lib.html5parser import HTMLParser
|
|
# >>> from html5lib.filters.validator import HTMLConformanceChecker
|
|
# >>> p = HTMLParser(tokenizer=HTMLConformanceChecker)
|
|
# >>> p.parse('<!doctype html>\n<html foo=bar></html>')
|
|
# <<class 'html5lib.treebuilders.simpletree.Document'> nil>
|
|
# >>> p.errors
|
|
# [((2, 14), 'unknown-attribute', {'attributeName' => u'foo', 'tagName' => u'html'})]
|
|
|
|
require 'html5/constants'
|
|
require 'html5/filters/base'
|
|
require 'html5/filters/iso639codes'
|
|
require 'html5/filters/rfc3987'
|
|
require 'html5/filters/rfc2046'
|
|
|
|
def _(str); str; end
|
|
|
|
class String
|
|
# lifted from rails
|
|
def underscore()
|
|
self.gsub(/::/, '/').
|
|
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
|
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
|
tr("-", "_").
|
|
downcase
|
|
end
|
|
end
|
|
|
|
HTML5::E.update({
|
|
"unknown-start-tag" =>
|
|
_("Unknown start tag <%(tagName)>."),
|
|
"unknown-attribute" =>
|
|
_("Unknown '%(attributeName)' attribute on <%(tagName)>."),
|
|
"missing-required-attribute" =>
|
|
_("The '%(attributeName)' attribute is required on <%(tagName)>."),
|
|
"unknown-input-type" =>
|
|
_("Illegal value for attribute on <input type='%(inputType)'>."),
|
|
"attribute-not-allowed-on-this-input-type" =>
|
|
_("The '%(attributeName)' attribute is not allowed on <input type=%(inputType)>."),
|
|
"deprecated-attribute" =>
|
|
_("This attribute is deprecated: '%(attributeName)' attribute on <%(tagName)>."),
|
|
"duplicate-value-in-token-list" =>
|
|
_("Duplicate value in token list: '%(attributeValue)' in '%(attributeName)' attribute on <%(tagName)>."),
|
|
"invalid-attribute-value" =>
|
|
_("Invalid attribute value: '%(attributeName)' attribute on <%(tagName)>."),
|
|
"space-in-id" =>
|
|
_("Whitespace is not allowed here: '%(attributeName)' attribute on <%(tagName)>."),
|
|
"duplicate-id" =>
|
|
_("This ID was already defined earlier: 'id' attribute on <%(tagName)>."),
|
|
"attribute-value-can-not-be-blank" =>
|
|
_("This value can not be blank: '%(attributeName)' attribute on <%(tagName)>."),
|
|
"id-does-not-exist" =>
|
|
_("This value refers to a non-existent ID: '%(attributeName)' attribute on <%(tagName)>."),
|
|
"invalid-enumerated-value" =>
|
|
_("Value must be one of %(enumeratedValues): '%(attributeName)' attribute on <%tagName)>."),
|
|
"invalid-boolean-value" =>
|
|
_("Value must be one of %(enumeratedValues): '%(attributeName)' attribute on <%tagName)>."),
|
|
"contextmenu-must-point-to-menu" =>
|
|
_("The contextmenu attribute must point to an ID defined on a <menu> element."),
|
|
"invalid-lang-code" =>
|
|
_("Invalid language code: '%(attributeName)' attibute on <%(tagName)>."),
|
|
"invalid-integer-value" =>
|
|
_("Value must be an integer: '%(attributeName)' attribute on <%tagName)>."),
|
|
"invalid-root-namespace" =>
|
|
_("Root namespace must be 'http://www.w3.org/1999/xhtml', or omitted."),
|
|
"invalid-browsing-context" =>
|
|
_("Value must be one of ('_self', '_parent', '_top'), or a name that does not start with '_' => '%(attributeName)' attribute on <%(tagName)>."),
|
|
"invalid-tag-uri" =>
|
|
_("Invalid URI: '%(attributeName)' attribute on <%(tagName)>."),
|
|
"invalid-urn" =>
|
|
_("Invalid URN: '%(attributeName)' attribute on <%(tagName)>."),
|
|
"invalid-uri-char" =>
|
|
_("Illegal character in URI: '%(attributeName)' attribute on <%(tagName)>."),
|
|
"uri-not-iri" =>
|
|
_("Expected a URI but found an IRI: '%(attributeName)' attribute on <%(tagName)>."),
|
|
"invalid-uri" =>
|
|
_("Invalid URI: '%(attributeName)' attribute on <%(tagName)>."),
|
|
"invalid-http-or-ftp-uri" =>
|
|
_("Invalid URI: '%(attributeName)' attribute on <%(tagName)>."),
|
|
"invalid-scheme" =>
|
|
_("Unregistered URI scheme: '%(attributeName)' attribute on <%(tagName)>."),
|
|
"invalid-rel" =>
|
|
_("Invalid link relation: '%(attributeName)' attribute on <%(tagName)>."),
|
|
"invalid-mime-type" =>
|
|
_("Invalid MIME type: '%(attributeName)' attribute on <%(tagName)>."),
|
|
})
|
|
|
|
|
|
class HTMLConformanceChecker < HTML5::Filters::Base
|
|
|
|
@@global_attributes = %w[class contenteditable contextmenu dir
|
|
draggable id irrelevant lang ref tabindex template
|
|
title onabort onbeforeunload onblur onchange onclick
|
|
oncontextmenu ondblclick ondrag ondragend ondragenter
|
|
ondragleave ondragover ondragstart ondrop onerror
|
|
onfocus onkeydown onkeypress onkeyup onload onmessage
|
|
onmousedown onmousemove onmouseout onmouseover onmouseup
|
|
onmousewheel onresize onscroll onselect onsubmit onunload]
|
|
# XXX lang in HTML only, xml:lang in XHTML only
|
|
# XXX validate ref, template
|
|
|
|
@@allowed_attribute_map = {
|
|
'html' => %w[xmlns],
|
|
'head' => [],
|
|
'title' => [],
|
|
'base' => %w[href target],
|
|
'link' => %w[href rel media hreflang type],
|
|
'meta' => %w[name http-equiv content charset], # XXX charset in HTML only
|
|
'style' => %w[media type scoped],
|
|
'body' => [],
|
|
'section' => [],
|
|
'nav' => [],
|
|
'article' => [],
|
|
'blockquote' => %w[cite],
|
|
'aside' => [],
|
|
'h1' => [],
|
|
'h2' => [],
|
|
'h3' => [],
|
|
'h4' => [],
|
|
'h5' => [],
|
|
'h6' => [],
|
|
'header' => [],
|
|
'footer' => [],
|
|
'address' => [],
|
|
'p' => [],
|
|
'hr' => [],
|
|
'br' => [],
|
|
'dialog' => [],
|
|
'pre' => [],
|
|
'ol' => %w[start],
|
|
'ul' => [],
|
|
'li' => %w[value], # XXX depends on parent
|
|
'dl' => [],
|
|
'dt' => [],
|
|
'dd' => [],
|
|
'a' => %w[href target ping rel media hreflang type],
|
|
'q' => %w[cite],
|
|
'cite' => [],
|
|
'em' => [],
|
|
'strong' => [],
|
|
'small' => [],
|
|
'm' => [],
|
|
'dfn' => [],
|
|
'abbr' => [],
|
|
'time' => %w[datetime],
|
|
'meter' => %w[value min low high max optimum],
|
|
'progress' => %w[value max],
|
|
'code' => [],
|
|
'var' => [],
|
|
'samp' => [],
|
|
'kbd' => [],
|
|
'sup' => [],
|
|
'sub' => [],
|
|
'span' => [],
|
|
'i' => [],
|
|
'b' => [],
|
|
'bdo' => [],
|
|
'ins' => %w[cite datetime],
|
|
'del' => %w[cite datetime],
|
|
'figure' => [],
|
|
'img' => %w[alt src usemap ismap height width], # XXX ismap depends on parent
|
|
'iframe' => %w[src],
|
|
# <embed> handled separately
|
|
'object' => %w[data type usemap height width],
|
|
'param' => %w[name value],
|
|
'video' => %w[src autoplay start loopstart loopend end loopcount controls],
|
|
'audio' => %w[src autoplay start loopstart loopend end loopcount controls],
|
|
'source' => %w[src type media],
|
|
'canvas' => %w[height width],
|
|
'map' => [],
|
|
'area' => %w[alt coords shape href target ping rel media hreflang type],
|
|
'table' => [],
|
|
'caption' => [],
|
|
'colgroup' => %w[span], # XXX only if element contains no <col> elements
|
|
'col' => %w[span],
|
|
'tbody' => [],
|
|
'thead' => [],
|
|
'tfoot' => [],
|
|
'tr' => [],
|
|
'td' => %w[colspan rowspan],
|
|
'th' => %w[colspan rowspan scope],
|
|
# all possible <input> attributes are listed here but <input> is really handled separately
|
|
'input' => %w[accept accesskey action alt autocomplete autofocus checked
|
|
disabled enctype form inputmode list maxlength method min
|
|
max name pattern step readonly replace required size src
|
|
tabindex target template value
|
|
],
|
|
'form' => %w[action method enctype accept name onsubmit onreset accept-charset
|
|
data replace
|
|
],
|
|
'button' => %w[action enctype method replace template name value type disabled form autofocus], # XXX may need matrix of acceptable attributes based on value of type attribute (like input)
|
|
'select' => %w[name size multiple disabled data accesskey form autofocus],
|
|
'optgroup' => %w[disabled label],
|
|
'option' => %w[selected disabled label value],
|
|
'textarea' => %w[maxlength name rows cols disabled readonly required form autofocus wrap accept],
|
|
'label' => %w[for accesskey form],
|
|
'fieldset' => %w[disabled form],
|
|
'output' => %w[form name for onforminput onformchange],
|
|
'datalist' => %w[data],
|
|
# XXX repetition model for repeating form controls
|
|
'script' => %w[src defer async type],
|
|
'noscript' => [],
|
|
'noembed' => [],
|
|
'event-source' => %w[src],
|
|
'details' => %w[open],
|
|
'datagrid' => %w[multiple disabled],
|
|
'command' => %w[type label icon hidden disabled checked radiogroup default],
|
|
'menu' => %w[type label autosubmit],
|
|
'datatemplate' => [],
|
|
'rule' => [],
|
|
'nest' => [],
|
|
'legend' => [],
|
|
'div' => [],
|
|
'font' => %w[style]
|
|
}
|
|
|
|
@@required_attribute_map = {
|
|
'link' => %w[href rel],
|
|
'bdo' => %w[dir],
|
|
'img' => %w[src],
|
|
'embed' => %w[src],
|
|
'object' => [], # XXX one of 'data' or 'type' is required
|
|
'param' => %w[name value],
|
|
'source' => %w[src],
|
|
'map' => %w[id]
|
|
}
|
|
|
|
@@input_type_allowed_attribute_map = {
|
|
'text' => %w[accesskey autocomplete autofocus disabled form inputmode list maxlength name pattern readonly required size tabindex value],
|
|
'password' => %w[accesskey autocomplete autofocus disabled form inputmode maxlength name pattern readonly required size tabindex value],
|
|
'checkbox' => %w[accesskey autofocus checked disabled form name required tabindex value],
|
|
'radio' => %w[accesskey autofocus checked disabled form name required tabindex value],
|
|
'button' => %w[accesskey autofocus disabled form name tabindex value],
|
|
'submit' => %w[accesskey action autofocus disabled enctype form method name replace tabindex target value],
|
|
'reset' => %w[accesskey autofocus disabled form name tabindex value],
|
|
'add' => %w[accesskey autofocus disabled form name tabindex template value],
|
|
'remove' => %w[accesskey autofocus disabled form name tabindex value],
|
|
'move-up' => %w[accesskey autofocus disabled form name tabindex value],
|
|
'move-down' => %w[accesskey autofocus disabled form name tabindex value],
|
|
'file' => %w[accept accesskey autofocus disabled form min max name required tabindex],
|
|
'hidden' => %w[disabled form name value],
|
|
'image' => %w[accesskey action alt autofocus disabled enctype form method name replace src tabindex target],
|
|
'datetime' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
|
'datetime-local' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
|
'date' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
|
'month' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
|
'week' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
|
'time' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
|
'number' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
|
'range' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
|
|
'email' => %w[accesskey autocomplete autofocus disabled form inputmode list maxlength name pattern readonly required tabindex value],
|
|
'url' => %w[accesskey autocomplete autofocus disabled form inputmode list maxlength name pattern readonly required tabindex value],
|
|
}
|
|
|
|
@@input_type_deprecated_attribute_map = {
|
|
'text' => ['size'],
|
|
'password' => ['size']
|
|
}
|
|
|
|
@@link_rel_values = %w[alternate archive archives author contact feed first begin start help icon index top contents toc last end license copyright next pingback prefetch prev previous search stylesheet sidebar tag up]
|
|
@@a_rel_values = %w[alternate archive archives author contact feed first begin start help index top contents toc last end license copyright next prev previous search sidebar tag up bookmark external nofollow]
|
|
|
|
def initialize(stream, *args)
|
|
super(HTML5::HTMLTokenizer.new(stream, *args))
|
|
@things_that_define_an_id = []
|
|
@things_that_point_to_an_id = []
|
|
@ids_we_have_known_and_loved = []
|
|
end
|
|
|
|
def each
|
|
__getobj__.each do |token|
|
|
method = "validate_#{token.fetch(:type, '-').to_s.underscore}_#{token.fetch(:name, '-').to_s.underscore}"
|
|
if respond_to?(method)
|
|
send(method, token){|t| yield t }
|
|
else
|
|
method = "validate_#{token.fetch(:type, '-').to_s.underscore}"
|
|
if respond_to?(method)
|
|
send(method, token) do |t|
|
|
yield t
|
|
end
|
|
end
|
|
end
|
|
yield token
|
|
end
|
|
eof do |t|
|
|
yield t
|
|
end
|
|
end
|
|
|
|
##########################################################################
|
|
# Start tag validation
|
|
##########################################################################
|
|
|
|
def validate_start_tag(token)
|
|
check_unknown_start_tag(token){|t| yield t}
|
|
check_start_tag_required_attributes(token) do |t|
|
|
yield t
|
|
end
|
|
check_start_tag_unknown_attributes(token) do |t|
|
|
yield t
|
|
end
|
|
check_attribute_values(token) do |t|
|
|
yield t
|
|
end
|
|
end
|
|
|
|
def validate_start_tag_embed(token)
|
|
check_start_tag_required_attributes(token) do |t|
|
|
yield t
|
|
end
|
|
check_attribute_values(token) do |t|
|
|
yield t
|
|
end
|
|
# spec says "any attributes w/o namespace"
|
|
# so don't call check_start_tag_unknown_attributes
|
|
end
|
|
|
|
def validate_start_tag_input(token)
|
|
check_attribute_values(token) do |t|
|
|
yield t
|
|
end
|
|
attr_dict = Hash[*token[:data].collect{|(name, value)| [name.downcase, value]}.flatten]
|
|
input_type = attr_dict.fetch('type', "text")
|
|
if !@@input_type_allowed_attribute_map.keys().include?(input_type)
|
|
yield({:type => "ParseError",
|
|
:data => "unknown-input-type",
|
|
:datavars => {:attrValue => input_type}})
|
|
end
|
|
allowed_attributes = @@input_type_allowed_attribute_map.fetch(input_type, [])
|
|
attr_dict.each do |attr_name, attr_value|
|
|
if !@@allowed_attribute_map['input'].include?(attr_name)
|
|
yield({:type => "ParseError",
|
|
:data => "unknown-attribute",
|
|
:datavars => {"tagName" => "input",
|
|
"attributeName" => attr_name}})
|
|
elsif !allowed_attributes.include?(attr_name)
|
|
yield({:type => "ParseError",
|
|
:data => "attribute-not-allowed-on-this-input-type",
|
|
:datavars => {"attributeName" => attr_name,
|
|
"inputType" => input_type}})
|
|
end
|
|
if @@input_type_deprecated_attribute_map.fetch(input_type, []).include?(attr_name)
|
|
yield({:type => "ParseError",
|
|
:data => "deprecated-attribute",
|
|
:datavars => {"attributeName" => attr_name,
|
|
"inputType" => input_type}})
|
|
end
|
|
end
|
|
end
|
|
|
|
##########################################################################
|
|
# Start tag validation helpers
|
|
##########################################################################
|
|
|
|
def check_unknown_start_tag(token)
|
|
# check for recognized tag name
|
|
name = (token[:name] || "").downcase
|
|
if !@@allowed_attribute_map.keys.include?(name)
|
|
yield({:type => "ParseError",
|
|
:data => "unknown-start-tag",
|
|
:datavars => {"tagName" => name}})
|
|
end
|
|
end
|
|
|
|
def check_start_tag_required_attributes(token)
|
|
# check for presence of required attributes
|
|
name = (token[:name] || "").downcase
|
|
if @@required_attribute_map.keys().include?(name)
|
|
attrs_present = (token[:data] || []).collect{|t| t[0]}
|
|
for attr_name in @@required_attribute_map[name]
|
|
if !attrs_present.include?(attr_name)
|
|
yield( {:type => "ParseError",
|
|
:data => "missing-required-attribute",
|
|
:datavars => {"tagName" => name,
|
|
"attributeName" => attr_name}})
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def check_start_tag_unknown_attributes(token)
|
|
# check for recognized attribute names
|
|
name = token[:name].downcase
|
|
allowed_attributes = @@global_attributes | @@allowed_attribute_map.fetch(name, [])
|
|
for attr_name, attr_value in token.fetch(:data, [])
|
|
if !allowed_attributes.include?(attr_name.downcase())
|
|
yield( {:type => "ParseError",
|
|
:data => "unknown-attribute",
|
|
:datavars => {"tagName" => name,
|
|
"attributeName" => attr_name}})
|
|
end
|
|
end
|
|
end
|
|
|
|
##########################################################################
|
|
# Attribute validation helpers
|
|
##########################################################################
|
|
|
|
# def checkURI(token, tag_name, attr_name, attr_value)
|
|
# is_valid, error_code = rfc3987.is_valid_uri(attr_value)
|
|
# if not is_valid
|
|
# yield {:type => "ParseError",
|
|
# :data => error_code,
|
|
# :datavars => {"tagName" => tag_name,
|
|
# "attributeName" => attr_name}}
|
|
# yield {:type => "ParseError",
|
|
# :data => "invalid-attribute-value",
|
|
# :datavars => {"tagName" => tag_name,
|
|
# "attributeName" => attr_name}}
|
|
|
|
def check_iri(token, tag_name, attr_name, attr_value)
|
|
is_valid, error_code = is_valid_iri(attr_value)
|
|
if !is_valid
|
|
yield({:type => "ParseError",
|
|
:data => error_code,
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name}})
|
|
yield({:type => "ParseError",
|
|
:data => "invalid-attribute-value",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name}})
|
|
end
|
|
end
|
|
|
|
def check_id(token, tag_name, attr_name, attr_value)
|
|
if !attr_value || attr_value.length == 0
|
|
yield({:type => "ParseError",
|
|
:data => "attribute-value-can-not-be-blank",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name}})
|
|
end
|
|
attr_value.each_byte do |b|
|
|
c = [b].pack('c*')
|
|
if HTML5::SPACE_CHARACTERS.include?(c)
|
|
yield( {:type => "ParseError",
|
|
:data => "space-in-id",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name}})
|
|
yield( {:type => "ParseError",
|
|
:data => "invalid-attribute-value",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name}})
|
|
break
|
|
end
|
|
end
|
|
end
|
|
|
|
def parse_token_list(value)
|
|
valueList = []
|
|
currentValue = ''
|
|
(value + ' ').each_byte do |b|
|
|
c = [b].pack('c*')
|
|
if HTML5::SPACE_CHARACTERS.include?(c)
|
|
if currentValue.length > 0
|
|
valueList << currentValue
|
|
currentValue = ''
|
|
end
|
|
else
|
|
currentValue += c
|
|
end
|
|
end
|
|
if currentValue.length > 0
|
|
valueList << currentValue
|
|
end
|
|
valueList
|
|
end
|
|
|
|
def check_token_list(tag_name, attr_name, attr_value)
|
|
# The "token" in the method name refers to tokens in an attribute value
|
|
# i.e. http://www.whatwg.org/specs/web-apps/current-work/#set-of
|
|
# but the "token" parameter refers to the token generated from
|
|
# HTMLTokenizer. Sorry for the confusion.
|
|
value_list = parse_token_list(attr_value)
|
|
value_dict = {}
|
|
for current_value in value_list
|
|
if value_dict.has_key?(current_value)
|
|
yield({:type => "ParseError",
|
|
:data => "duplicate-value-in-token-list",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name,
|
|
"attributeValue" => current_value}})
|
|
break
|
|
end
|
|
value_dict[current_value] = 1
|
|
end
|
|
end
|
|
|
|
def check_enumerated_value(token, tag_name, attr_name, attr_value, enumerated_values)
|
|
if !attr_value || attr_value.length == 0
|
|
yield( {:type => "ParseError",
|
|
:data => "attribute-value-can-not-be-blank",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name}})
|
|
return
|
|
end
|
|
attr_value.downcase!
|
|
if !enumerated_values.include?(attr_value)
|
|
yield( {:type => "ParseError",
|
|
:data => "invalid-enumerated-value",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attribute_name" => attr_name,
|
|
"enumeratedValues" => enumerated_values}})
|
|
yield( {:type => "ParseError",
|
|
:data => "invalid-attribute-value",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name}})
|
|
end
|
|
end
|
|
|
|
def check_boolean(token, tag_name, attr_name, attr_value)
|
|
enumerated_values = [attr_name, '']
|
|
if !enumerated_values.include?(attr_value)
|
|
yield( {:type => "ParseError",
|
|
:data => "invalid-boolean-value",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name,
|
|
"enumeratedValues" => enumerated_values}})
|
|
yield( {:type => "ParseError",
|
|
:data => "invalid-attribute-value",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name}})
|
|
end
|
|
end
|
|
|
|
def check_integer(token, tag_name, attr_name, attr_value)
|
|
sign = 1
|
|
number_string = ''
|
|
state = 'begin' # ('begin', 'initial-number', 'number', 'trailing-junk')
|
|
error = {:type => "ParseError",
|
|
:data => "invalid-integer-value",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name,
|
|
"attributeValue" => attr_value}}
|
|
attr_value.scan(/./) do |c|
|
|
if state == 'begin'
|
|
if HTML5::SPACE_CHARACTERS.include?(c)
|
|
next
|
|
elsif c == '-'
|
|
sign = -1
|
|
state = 'initial-number'
|
|
elsif HTML5::DIGITS.include?(c)
|
|
number_string += c
|
|
state = 'in-number'
|
|
else
|
|
yield error
|
|
return
|
|
end
|
|
elsif state == 'initial-number'
|
|
if !HTML5::DIGITS.include?(c)
|
|
yield error
|
|
return
|
|
end
|
|
number_string += c
|
|
state = 'in-number'
|
|
elsif state == 'in-number'
|
|
if HTML5::DIGITS.include?(c)
|
|
number_string += c
|
|
else
|
|
state = 'trailing-junk'
|
|
end
|
|
elsif state == 'trailing-junk'
|
|
next
|
|
end
|
|
end
|
|
if number_string.length == 0
|
|
yield( {:type => "ParseError",
|
|
:data => "attribute-value-can-not-be-blank",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name}})
|
|
end
|
|
end
|
|
|
|
def check_floating_point_number(token, tag_name, attr_name, attr_value)
|
|
# XXX
|
|
end
|
|
|
|
def check_browsing_context(token, tag_name, attr_name, attr_value)
|
|
return if not attr_value
|
|
return if attr_value[0] != ?_
|
|
attr_value.downcase!
|
|
return if ['_self', '_parent', '_top', '_blank'].include?(attr_value)
|
|
yield({:type => "ParseError",
|
|
:data => "invalid-browsing-context",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name}})
|
|
end
|
|
|
|
def check_lang_code(token, tag_name, attr_name, attr_value)
|
|
return if !attr_value || attr_value == '' # blank is OK
|
|
if not is_valid_lang_code(attr_value)
|
|
yield( {:type => "ParseError",
|
|
:data => "invalid-lang-code",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name,
|
|
"attributeValue" => attr_value}})
|
|
end
|
|
end
|
|
|
|
def check_mime_type(token, tag_name, attr_name, attr_value)
|
|
# XXX needs tests
|
|
if not attr_value
|
|
yield( {:type => "ParseError",
|
|
:data => "attribute-value-can-not-be-blank",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name}})
|
|
end
|
|
if not is_valid_mime_type(attr_value)
|
|
yield( {:type => "ParseError",
|
|
:data => "invalid-mime-type",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name,
|
|
"attributeValue" => attr_value}})
|
|
end
|
|
end
|
|
|
|
def check_media_query(token, tag_name, attr_name, attr_value)
|
|
# XXX
|
|
end
|
|
|
|
def check_link_relation(token, tag_name, attr_name, attr_value)
|
|
check_token_list(tag_name, attr_name, attr_value) do |t|
|
|
yield t
|
|
end
|
|
value_list = parse_token_list(attr_value)
|
|
allowed_values = tag_name == 'link' ? @@link_rel_values : @@a_rel_values
|
|
for current_value in value_list
|
|
if !allowed_values.include?(current_value)
|
|
yield({:type => "ParseError",
|
|
:data => "invalid-rel",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name}})
|
|
end
|
|
end
|
|
end
|
|
|
|
def check_date_time(token, tag_name, attr_name, attr_value)
|
|
# XXX
|
|
state = 'begin' # ('begin', '...
|
|
# for c in attr_value
|
|
# if state == 'begin' =>
|
|
# if SPACE_CHARACTERS.include?(c)
|
|
# continue
|
|
# elsif digits.include?(c)
|
|
# state = ...
|
|
end
|
|
|
|
##########################################################################
|
|
# Attribute validation
|
|
##########################################################################
|
|
|
|
def check_attribute_values(token)
|
|
tag_name = token.fetch(:name, "")
|
|
for attr_name, attr_value in token.fetch(:data, [])
|
|
attr_name = attr_name.downcase
|
|
method = "validate_attribute_value_#{tag_name.to_s.underscore}_#{attr_name.to_s.underscore}"
|
|
if respond_to?(method)
|
|
send(method, token, tag_name, attr_name, attr_value) do |t|
|
|
yield t
|
|
end
|
|
else
|
|
method = "validate_attribute_value_#{attr_name.to_s.underscore}"
|
|
if respond_to?(method)
|
|
send(method, token, tag_name, attr_name, attr_value) do |t|
|
|
yield t
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def validate_attribute_value_class(token, tag_name, attr_name, attr_value)
|
|
check_token_list(tag_name, attr_name, attr_value) do |t|
|
|
yield t
|
|
yield( {:type => "ParseError",
|
|
:data => "invalid-attribute-value",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name}})
|
|
end
|
|
end
|
|
|
|
def validate_attribute_value_contenteditable(token, tag_name, attr_name, attr_value)
|
|
check_enumerated_value(token, tag_name, attr_name, attr_value, ['true', 'false', '']) do |t|
|
|
yield t
|
|
end
|
|
end
|
|
|
|
def validate_attribute_value_dir(token, tag_name, attr_name, attr_value)
|
|
check_enumerated_value(token, tag_name, attr_name, attr_value, ['ltr', 'rtl']) do |t|
|
|
yield t
|
|
end
|
|
end
|
|
|
|
def validate_attribute_value_draggable(token, tag_name, attr_name, attr_value)
|
|
check_enumerated_value(token, tag_name, attr_name, attr_value, ['true', 'false']) do |t|
|
|
yield t
|
|
end
|
|
end
|
|
|
|
alias validate_attribute_value_irrelevant check_boolean
|
|
alias validate_attribute_value_lang check_lang_code
|
|
|
|
def validate_attribute_value_contextmenu(token, tag_name, attr_name, attr_value)
|
|
check_id(token, tag_name, attr_name, attr_value) do |t|
|
|
yield t
|
|
end
|
|
@things_that_point_to_an_id << token
|
|
end
|
|
|
|
def validate_attribute_value_id(token, tag_name, attr_name, attr_value)
|
|
# This method has side effects. It adds 'token' to the list of
|
|
# things that define an ID (@things_that_define_an_id) so that we can
|
|
# later check 1) whether an ID is duplicated, and 2) whether all the
|
|
# things that point to something else by ID (like <label for> or
|
|
# <span contextmenu>) point to an ID that actually exists somewhere.
|
|
check_id(token, tag_name, attr_name, attr_value) do |t|
|
|
yield t
|
|
end
|
|
return if not attr_value
|
|
if @ids_we_have_known_and_loved.include?(attr_value)
|
|
yield( {:type => "ParseError",
|
|
:data => "duplicate-id",
|
|
:datavars => {"tagName" => tag_name}})
|
|
end
|
|
@ids_we_have_known_and_loved << attr_value
|
|
@things_that_define_an_id << token
|
|
end
|
|
|
|
alias validate_attribute_value_tabindex check_integer
|
|
|
|
def validate_attribute_value_ref(token, tag_name, attr_name, attr_value)
|
|
# XXX
|
|
end
|
|
|
|
def validate_attribute_value_template(token, tag_name, attr_name, attr_value)
|
|
# XXX
|
|
end
|
|
|
|
def validate_attribute_value_html_xmlns(token, tag_name, attr_name, attr_value)
|
|
if attr_value != "http://www.w3.org/1999/xhtml"
|
|
yield( {:type => "ParseError",
|
|
:data => "invalid-root-namespace",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => attr_name}})
|
|
end
|
|
end
|
|
|
|
alias validate_attribute_value_base_href check_iri
|
|
alias validate_attribute_value_base_target check_browsing_context
|
|
alias validate_attribute_value_link_href check_iri
|
|
alias validate_attribute_value_link_rel check_link_relation
|
|
alias validate_attribute_value_link_media check_media_query
|
|
alias validate_attribute_value_link_hreflang check_lang_code
|
|
alias validate_attribute_value_link_type check_mime_type
|
|
# XXX <meta> attributes
|
|
alias validate_attribute_value_style_media check_media_query
|
|
alias validate_attribute_value_style_type check_mime_type
|
|
alias validate_attribute_value_style_scoped check_boolean
|
|
alias validate_attribute_value_blockquote_cite check_iri
|
|
alias validate_attribute_value_ol_start check_integer
|
|
alias validate_attribute_value_li_value check_integer
|
|
# XXX need tests from here on
|
|
alias validate_attribute_value_a_href check_iri
|
|
alias validate_attribute_value_a_target check_browsing_context
|
|
|
|
def validate_attribute_value_a_ping(token, tag_name, attr_name, attr_value)
|
|
value_list = parse_token_list(attr_value)
|
|
for current_value in value_list
|
|
checkIRI(token, tag_name, attr_name, attr_value) do |t|
|
|
yield t
|
|
end
|
|
end
|
|
end
|
|
|
|
alias validate_attribute_value_a_rel check_link_relation
|
|
alias validate_attribute_value_a_media check_media_query
|
|
alias validate_attribute_value_a_hreflang check_lang_code
|
|
alias validate_attribute_value_a_type check_mime_type
|
|
alias validate_attribute_value_q_cite check_iri
|
|
alias validate_attribute_value_time_datetime check_date_time
|
|
alias validate_attribute_value_meter_value check_floating_point_number
|
|
alias validate_attribute_value_meter_min check_floating_point_number
|
|
alias validate_attribute_value_meter_low check_floating_point_number
|
|
alias validate_attribute_value_meter_high check_floating_point_number
|
|
alias validate_attribute_value_meter_max check_floating_point_number
|
|
alias validate_attribute_value_meter_optimum check_floating_point_number
|
|
alias validate_attribute_value_progress_value check_floating_point_number
|
|
alias validate_attribute_value_progress_max check_floating_point_number
|
|
alias validate_attribute_value_ins_cite check_iri
|
|
alias validate_attribute_value_ins_datetime check_date_time
|
|
alias validate_attribute_value_del_cite check_iri
|
|
alias validate_attribute_value_del_datetime check_date_time
|
|
|
|
##########################################################################
|
|
# Whole document validation (IDs, etc.)
|
|
##########################################################################
|
|
|
|
def eof
|
|
for token in @things_that_point_to_an_id
|
|
tag_name = token.fetch(:name, "").downcase
|
|
attrs_dict = token[:data] # by now html5parser has "normalized" the attrs list into a dict.
|
|
# hooray for obscure side effects!
|
|
attr_value = attrs_dict.fetch("contextmenu", "")
|
|
if attr_value and (!@ids_we_have_known_and_loved.include?(attr_value))
|
|
yield( {:type => "ParseError",
|
|
:data => "id-does-not-exist",
|
|
:datavars => {"tagName" => tag_name,
|
|
"attributeName" => "contextmenu",
|
|
"attributeValue" => attr_value}})
|
|
else
|
|
for ref_token in @things_that_define_an_id
|
|
id = ref_token.fetch(:data, {}).fetch("id", "")
|
|
if not id
|
|
continue
|
|
end
|
|
if id == attr_value
|
|
if ref_token.fetch(:name, "").downcase != "men"
|
|
yield( {:type => "ParseError",
|
|
:data => "contextmenu-must-point-to-menu"})
|
|
end
|
|
break
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|