Update to latest HTML5lib

Fix that Tokenizer bug for real this time.
This commit is contained in:
Jacques Distler 2007-09-09 22:26:19 -05:00
parent f3a89556c4
commit ed68d975df
53 changed files with 11569 additions and 2603 deletions

View file

@ -81,8 +81,8 @@ def print_output(parser, document, opts)
if opts.error
errList=[]
for pos, message in parser.errors
errList << ("Line %i Col %i"%pos + " " + message)
for pos, errorcode, datavars in parser.errors
errList << "Line %i Col %i"%pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars
end
$stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
end

View file

@ -2,6 +2,8 @@ module HTML5
class EOF < Exception; end
def self._(str); str end
CONTENT_MODEL_FLAGS = [
:PCDATA,
:RCDATA,
@ -815,4 +817,228 @@ module HTML5
hz-gb-2312
]
E = {
"null-character" =>
_("Null character in input stream, replaced with U+FFFD."),
"incorrectly-placed-solidus" =>
_("Solidus (/) incorrectly placed in tag."),
"incorrect-cr-newline-entity" =>
_("Incorrect CR newline entity, replaced with LF."),
"illegal-windows-1252-entity" =>
_("Entity used with illegal number (windows-1252 reference)."),
"cant-convert-numeric-entity" =>
_("Numeric entity couldn't be converted to character " +
"(codepoint U+%(charAsInt)08x)."),
"illegal-codepoint-for-numeric-entity" =>
_("Numeric entity represents an illegal codepoint=> " +
"U+%(charAsInt)08x."),
"numeric-entity-without-semicolon" =>
_("Numeric entity didn't end with ';'."),
"expected-numeric-entity-but-got-eof" =>
_("Numeric entity expected. Got end of file instead."),
"expected-numeric-entity" =>
_("Numeric entity expected but none found."),
"named-entity-without-semicolon" =>
_("Named entity didn't end with ';'."),
"expected-named-entity" =>
_("Named entity expected. Got none."),
"attributes-in-end-tag" =>
_("End tag contains unexpected attributes."),
"expected-tag-name-but-got-right-bracket" =>
_("Expected tag name. Got '>' instead."),
"expected-tag-name-but-got-question-mark" =>
_("Expected tag name. Got '?' instead. (HTML doesn't " +
"support processing instructions.)"),
"expected-tag-name" =>
_("Expected tag name. Got something else instead"),
"expected-closing-tag-but-got-right-bracket" =>
_("Expected closing tag. Got '>' instead. Ignoring '</>'."),
"expected-closing-tag-but-got-eof" =>
_("Expected closing tag. Unexpected end of file."),
"expected-closing-tag-but-got-char" =>
_("Expected closing tag. Unexpected character '%(data)' found."),
"eof-in-tag-name" =>
_("Unexpected end of file in the tag name."),
"expected-attribute-name-but-got-eof" =>
_("Unexpected end of file. Expected attribute name instead."),
"eof-in-attribute-name" =>
_("Unexpected end of file in attribute name."),
"duplicate-attribute" =>
_("Dropped duplicate attribute on tag."),
"expected-end-of-tag-name-but-got-eof" =>
_("Unexpected end of file. Expected = or end of tag."),
"expected-attribute-value-but-got-eof" =>
_("Unexpected end of file. Expected attribute value."),
"eof-in-attribute-value-double-quote" =>
_("Unexpected end of file in attribute value (\")."),
"eof-in-attribute-value-single-quote" =>
_("Unexpected end of file in attribute value (')."),
"eof-in-attribute-value-no-quotes" =>
_("Unexpected end of file in attribute value."),
"expected-dashes-or-doctype" =>
_("Expected '--' or 'DOCTYPE'. Not found."),
"incorrect-comment" =>
_("Incorrect comment."),
"eof-in-comment" =>
_("Unexpected end of file in comment."),
"eof-in-comment-end-dash" =>
_("Unexpected end of file in comment (-)"),
"unexpected-dash-after-double-dash-in-comment" =>
_("Unexpected '-' after '--' found in comment."),
"eof-in-comment-double-dash" =>
_("Unexpected end of file in comment (--)."),
"unexpected-char-in-comment" =>
_("Unexpected character in comment found."),
"need-space-after-doctype" =>
_("No space after literal string 'DOCTYPE'."),
"expected-doctype-name-but-got-right-bracket" =>
_("Unexpected > character. Expected DOCTYPE name."),
"expected-doctype-name-but-got-eof" =>
_("Unexpected end of file. Expected DOCTYPE name."),
"eof-in-doctype-name" =>
_("Unexpected end of file in DOCTYPE name."),
"eof-in-doctype" =>
_("Unexpected end of file in DOCTYPE."),
"expected-space-or-right-bracket-in-doctype" =>
_("Expected space or '>'. Got '%(data)'"),
"unexpected-end-of-doctype" =>
_("Unexpected end of DOCTYPE."),
"unexpected-char-in-doctype" =>
_("Unexpected character in DOCTYPE."),
"eof-in-bogus-doctype" =>
_("Unexpected end of file in bogus doctype."),
"eof-in-innerhtml" =>
_("XXX innerHTML EOF"),
"unexpected-doctype" =>
_("Unexpected DOCTYPE. Ignored."),
"non-html-root" =>
_("html needs to be the first start tag."),
"expected-doctype-but-got-eof" =>
_("Unexpected End of file. Expected DOCTYPE."),
"unknown-doctype" =>
_("Erroneous DOCTYPE."),
"expected-doctype-but-got-chars" =>
_("Unexpected non-space characters. Expected DOCTYPE."),
"expected-doctype-but-got-start-tag" =>
_("Unexpected start tag (%(name)). Expected DOCTYPE."),
"expected-doctype-but-got-end-tag" =>
_("Unexpected end tag (%(name)). Expected DOCTYPE."),
"end-tag-after-implied-root" =>
_("Unexpected end tag (%(name)) after the (implied) root element."),
"expected-named-closing-tag-but-got-eof" =>
_("Unexpected end of file. Expected end tag (%(name))."),
"two-heads-are-not-better-than-one" =>
_("Unexpected start tag head in existing head. Ignored."),
"unexpected-end-tag" =>
_("Unexpected end tag (%(name)). Ignored."),
"unexpected-start-tag-out-of-my-head" =>
_("Unexpected start tag (%(name)) that can be in head. Moved."),
"unexpected-start-tag" =>
_("Unexpected start tag (%(name))."),
"missing-end-tag" =>
_("Missing end tag (%(name))."),
"missing-end-tags" =>
_("Missing end tags (%(name))."),
"unexpected-start-tag-implies-end-tag" =>
_("Unexpected start tag (%(startName)) " +
"implies end tag (%(endName))."),
"unexpected-start-tag-treated-as" =>
_("Unexpected start tag (%(originalName)). Treated as %(newName)."),
"deprecated-tag" =>
_("Unexpected start tag %(name). Don't use it!"),
"unexpected-start-tag-ignored" =>
_("Unexpected start tag %(name). Ignored."),
"expected-one-end-tag-but-got-another" =>
_("Unexpected end tag (%(gotName)). " +
"Missing end tag (%(expectedName))."),
"end-tag-too-early" =>
_("End tag (%(name)) seen too early. Expected other end tag."),
"end-tag-too-early-named" =>
_("Unexpected end tag (%(gotName)). Expected end tag (%(expectedName))."),
"end-tag-too-early-ignored" =>
_("End tag (%(name)) seen too early. Ignored."),
"adoption-agency-1.1" =>
_("End tag (%(name)) violates step 1, " +
"paragraph 1 of the adoption agency algorithm."),
"adoption-agency-1.2" =>
_("End tag (%(name)) violates step 1, " +
"paragraph 2 of the adoption agency algorithm."),
"adoption-agency-1.3" =>
_("End tag (%(name)) violates step 1, " +
"paragraph 3 of the adoption agency algorithm."),
"unexpected-end-tag-treated-as" =>
_("Unexpected end tag (%(originalName)). Treated as %(newName)."),
"no-end-tag" =>
_("This element (%(name)) has no end tag."),
"unexpected-implied-end-tag-in-table" =>
_("Unexpected implied end tag (%(name)) in the table phase."),
"unexpected-implied-end-tag-in-table-body" =>
_("Unexpected implied end tag (%(name)) in the table body phase."),
"unexpected-char-implies-table-voodoo" =>
_("Unexpected non-space characters in " +
"table context caused voodoo mode."),
"unexpected-start-tag-implies-table-voodoo" =>
_("Unexpected start tag (%(name)) in " +
"table context caused voodoo mode."),
"unexpected-end-tag-implies-table-voodoo" =>
_("Unexpected end tag (%(name)) in " +
"table context caused voodoo mode."),
"unexpected-cell-in-table-body" =>
_("Unexpected table cell start tag (%(name)) " +
"in the table body phase."),
"unexpected-cell-end-tag" =>
_("Got table cell end tag (%(name)) " +
"while required end tags are missing."),
"unexpected-end-tag-in-table-body" =>
_("Unexpected end tag (%(name)) in the table body phase. Ignored."),
"unexpected-implied-end-tag-in-table-row" =>
_("Unexpected implied end tag (%(name)) in the table row phase."),
"unexpected-end-tag-in-table-row" =>
_("Unexpected end tag (%(name)) in the table row phase. Ignored."),
"unexpected-select-in-select" =>
_("Unexpected select start tag in the select phase " +
"implies select start tag."),
"unexpected-start-tag-in-select" =>
_("Unexpected start tag token (%(name) in the select phase. " +
"Ignored."),
"unexpected-end-tag-in-select" =>
_("Unexpected end tag (%(name)) in the select phase. Ignored."),
"unexpected-char-after-body" =>
_("Unexpected non-space characters in the after body phase."),
"unexpected-start-tag-after-body" =>
_("Unexpected start tag token (%(name))" +
" in the after body phase."),
"unexpected-end-tag-after-body" =>
_("Unexpected end tag token (%(name))" +
" in the after body phase."),
"unexpected-char-in-frameset" =>
_("Unepxected characters in the frameset phase. Characters ignored."),
"unexpected-start-tag-in-frameset" =>
_("Unexpected start tag token (%(name))" +
" in the frameset phase. Ignored."),
"unexpected-frameset-in-frameset-innerhtml" =>
_("Unexpected end tag token (frameset) " +
"in the frameset phase (innerHTML)."),
"unexpected-end-tag-in-frameset" =>
_("Unexpected end tag token (%(name))" +
" in the frameset phase. Ignored."),
"unexpected-char-after-frameset" =>
_("Unexpected non-space characters in the " +
"after frameset phase. Ignored."),
"unexpected-start-tag-after-frameset" =>
_("Unexpected start tag (%(name))" +
" in the after frameset phase. Ignored."),
"unexpected-end-tag-after-frameset" =>
_("Unexpected end tag (%(name))" +
" in the after frameset phase. Ignored."),
"expected-eof-but-got-char" =>
_("Unexpected non-space characters. Expected end of file."),
"expected-eof-but-got-start-tag" =>
_("Unexpected start tag (%(name))" +
". Expected end of file."),
"expected-eof-but-got-end-tag" =>
_("Unexpected end tag (%(name))" +
". Expected end of file."),
}
end

View file

@ -0,0 +1,752 @@
# borrowed from feedvalidator, original copyright license is
#
# Copyright (c) 2002-2006, Sam Ruby, Mark Pilgrim, Joseph Walton, and Phil Ringnalda
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
ISO_LANG = {
'aa' => 'Afar',
'ab' => 'Abkhazian',
'ae' => 'Avestan',
'af' => 'Afrikaans',
'ak' => 'Akan',
'am' => 'Amharic',
'an' => 'Aragonese',
'ar' => 'Arabic',
'as' => 'Assamese',
'av' => 'Avaric',
'ay' => 'Aymara',
'az' => 'Azerbaijani',
'ba' => 'Bashkir',
'be' => 'Byelorussian',
'bg' => 'Bulgarian',
'bh' => 'Bihari',
'bi' => 'Bislama',
'bm' => 'Bambara',
'bn' => 'Bengali;Bangla',
'bo' => 'Tibetan',
'br' => 'Breton',
'bs' => 'Bosnian',
'ca' => 'Catalan',
'ce' => 'Chechen',
'ch' => 'Chamorro',
'co' => 'Corsican',
'cr' => 'Cree',
'cs' => 'Czech',
'cu' => 'Church Slavic',
'cv' => 'Chuvash',
'cy' => 'Welsh',
'da' => 'Danish',
'de' => 'German',
'dv' => 'Divehi',
'dz' => 'Dzongkha',
'ee' => 'Ewe',
'el' => 'Greek',
'en' => 'English',
'eo' => 'Esperanto',
'es' => 'Spanish',
'et' => 'Estonian',
'eu' => 'Basque',
'fa' => 'Persian (Farsi)',
'ff' => 'Fulah',
'fi' => 'Finnish',
'fj' => 'Fiji',
'fo' => 'Faroese',
'fr' => 'French',
'fy' => 'Frisian, Western',
'ga' => 'Irish',
'gd' => 'Scots Gaelic',
'gl' => 'Galician',
'gn' => 'Guarani',
'gu' => 'Gujarati',
'gv' => 'Manx',
'ha' => 'Hausa',
'he' => 'Hebrew',
'hi' => 'Hindi',
'ho' => 'Hiri Motu',
'hr' => 'Croatian',
'ht' => 'Haitian',
'hu' => 'Hungarian',
'hy' => 'Armenian',
'hz' => 'Herero',
'ia' => 'Interlingua',
'id' => 'Indonesian',
'ie' => 'Interlingue',
'ig' => 'Igbo',
'ii' => 'Sichuan Yi',
'ik' => 'Inupiak',
'io' => 'Ido',
'is' => 'Icelandic',
'it' => 'Italian',
'iu' => 'Inuktitut',
'ja' => 'Japanese',
'jv' => 'Javanese',
'ka' => 'Georgian',
'kg' => 'Kongo',
'ki' => 'Kikuyu; Gikuyu',
'kj' => 'Kuanyama; Kwanyama',
'kk' => 'Kazakh',
'kl' => 'Greenlandic',
'km' => 'Cambodian',
'kn' => 'Kannada',
'ko' => 'Korean',
'kr' => 'Kanuri',
'ks' => 'Kashmiri',
'ku' => 'Kurdish',
'kv' => 'Komi',
'kw' => 'Cornish',
'ky' => 'Kirghiz',
'la' => 'Latin',
'lb' => 'Letzeburgesch; Luxembourgish',
'lg' => 'Ganda',
'li' => 'Limburgan; Limburger, Limburgish',
'ln' => 'Lingala',
'lo' => 'Lao',
'lt' => 'Lithuanian',
'lu' => 'Luba-Katanga',
'lv' => 'Latvian',
'mg' => 'Malagasy',
'mh' => 'Marshallese',
'mi' => 'Maori',
'mk' => 'Macedonian',
'ml' => 'Malayalam',
'mn' => 'Mongolian',
'mo' => 'Moldavian',
'mr' => 'Marathi',
'ms' => 'Malay',
'mt' => 'Maltese',
'my' => 'Burmese',
'na' => 'Nauru',
'nb' => 'Norwegian Bokmal',
'nd' => 'Ndebele, North',
'ne' => 'Nepali',
'ng' => 'Ndonga',
'nl' => 'Dutch',
'nn' => 'Norwegian Nynorsk',
'no' => 'Norwegian',
'nr' => 'Ndebele, South',
'nv' => 'Navaho; Navajo',
'ny' => 'Chewa; Chichewa; Nyanha',
'oc' => 'Occitan',
'oj' => 'Ojibwa',
'om' => 'Afan (Oromo)',
'or' => 'Oriya',
'os' => 'Ossetian; Ossetic',
'pa' => 'Punjabi',
'pi' => 'Pali',
'pl' => 'Polish',
'ps' => 'Pushto',
'pt' => 'Portuguese',
'qu' => 'Quechua',
'rm' => 'Rhaeto-Romance',
'rn' => 'Kurundi',
'ro' => 'Romanian',
'ru' => 'Russian',
'rw' => 'Kinyarwanda',
'sa' => 'Sanskrit',
'sc' => 'Sardinian',
'sd' => 'Sindhi',
'se' => 'Northern Sami',
'sg' => 'Sangho',
'sh' => 'Serbo-Croatian',
'si' => 'Singhalese',
'sk' => 'Slovak',
'sl' => 'Slovenian',
'sm' => 'Samoan',
'sn' => 'Shona',
'so' => 'Somali',
'sq' => 'Albanian',
'sr' => 'Serbian',
'ss' => 'Swati',
'st' => 'Sotho, Southern',
'su' => 'Sundanese',
'sv' => 'Swedish',
'sw' => 'Swahili',
'ta' => 'Tamil',
'te' => 'Telugu',
'tg' => 'Tajik',
'th' => 'Thai',
'ti' => 'Tigrinya',
'tk' => 'Turkmen',
'tl' => 'Tagalog',
'tn' => 'Tswana',
'to' => 'Tonga',
'tr' => 'Turkish',
'ts' => 'Tsonga',
'tt' => 'Tatar',
'tw' => 'Twi',
'ty' => 'Tahitian',
'ug' => 'Uigur',
'uk' => 'Ukrainian',
'ur' => 'Urdu',
'uz' => 'Uzbek',
've' => 'Venda',
'vi' => 'Vietnamese',
'vo' => 'Volapuk',
'wa' => 'Walloon',
'wo' => 'Wolof',
'xh' => 'Xhosa',
'yi' => 'Yiddish',
'yo' => 'Yoruba',
'za' => 'Zhuang',
'zh' => 'Chinese',
'zu' => 'Zulu',
'x' => 'a user-defined language',
'xx' => 'a user-defined language',
'abk' => 'Abkhazian',
'ace' => 'Achinese',
'ach' => 'Acoli',
'ada' => 'Adangme',
'ady' => 'Adygei',
'ady' => 'Adyghe',
'aar' => 'Afar',
'afh' => 'Afrihili',
'afr' => 'Afrikaans',
'afa' => 'Afro-Asiatic (Other)',
'ain' => 'Ainu',
'aka' => 'Akan',
'akk' => 'Akkadian',
'alb' => 'Albanian',
'sqi' => 'Albanian',
'gws' => 'Alemanic',
'ale' => 'Aleut',
'alg' => 'Algonquian languages',
'tut' => 'Altaic (Other)',
'amh' => 'Amharic',
'anp' => 'Angika',
'apa' => 'Apache languages',
'ara' => 'Arabic',
'arg' => 'Aragonese',
'arc' => 'Aramaic',
'arp' => 'Arapaho',
'arn' => 'Araucanian',
'arw' => 'Arawak',
'arm' => 'Armenian',
'hye' => 'Armenian',
'rup' => 'Aromanian',
'art' => 'Artificial (Other)',
'asm' => 'Assamese',
'ast' => 'Asturian',
'ath' => 'Athapascan languages',
'aus' => 'Australian languages',
'map' => 'Austronesian (Other)',
'ava' => 'Avaric',
'ave' => 'Avestan',
'awa' => 'Awadhi',
'aym' => 'Aymara',
'aze' => 'Azerbaijani',
'ast' => 'Bable',
'ban' => 'Balinese',
'bat' => 'Baltic (Other)',
'bal' => 'Baluchi',
'bam' => 'Bambara',
'bai' => 'Bamileke languages',
'bad' => 'Banda',
'bnt' => 'Bantu (Other)',
'bas' => 'Basa',
'bak' => 'Bashkir',
'baq' => 'Basque',
'eus' => 'Basque',
'btk' => 'Batak (Indonesia)',
'bej' => 'Beja',
'bel' => 'Belarusian',
'bem' => 'Bemba',
'ben' => 'Bengali',
'ber' => 'Berber (Other)',
'bho' => 'Bhojpuri',
'bih' => 'Bihari',
'bik' => 'Bikol',
'byn' => 'Bilin',
'bin' => 'Bini',
'bis' => 'Bislama',
'byn' => 'Blin',
'nob' => 'Bokmal, Norwegian',
'bos' => 'Bosnian',
'bra' => 'Braj',
'bre' => 'Breton',
'bug' => 'Buginese',
'bul' => 'Bulgarian',
'bua' => 'Buriat',
'bur' => 'Burmese',
'mya' => 'Burmese',
'cad' => 'Caddo',
'car' => 'Carib',
'spa' => 'Castilian',
'cat' => 'Catalan',
'cau' => 'Caucasian (Other)',
'ceb' => 'Cebuano',
'cel' => 'Celtic (Other)',
'cai' => 'Central American Indian (Other)',
'chg' => 'Chagatai',
'cmc' => 'Chamic languages',
'cha' => 'Chamorro',
'che' => 'Chechen',
'chr' => 'Cherokee',
'nya' => 'Chewa',
'chy' => 'Cheyenne',
'chb' => 'Chibcha',
'nya' => 'Chichewa',
'chi' => 'Chinese',
'zho' => 'Chinese',
'chn' => 'Chinook jargon',
'chp' => 'Chipewyan',
'cho' => 'Choctaw',
'zha' => 'Chuang',
'chu' => 'Church Slavic; Church Slavonic; Old Church Slavonic; Old Church Slavic; Old Bulgarian',
'chk' => 'Chuukese',
'chv' => 'Chuvash',
'nwc' => 'Classical Nepal Bhasa; Classical Newari; Old Newari',
'cop' => 'Coptic',
'cor' => 'Cornish',
'cos' => 'Corsican',
'cre' => 'Cree',
'mus' => 'Creek',
'crp' => 'Creoles and pidgins(Other)',
'cpe' => 'Creoles and pidgins, English-based (Other)',
'cpf' => 'Creoles and pidgins, French-based (Other)',
'cpp' => 'Creoles and pidgins, Portuguese-based (Other)',
'crh' => 'Crimean Tatar; Crimean Turkish',
'scr' => 'Croatian',
'hrv' => 'Croatian',
'cus' => 'Cushitic (Other)',
'cze' => 'Czech',
'ces' => 'Czech',
'dak' => 'Dakota',
'dan' => 'Danish',
'dar' => 'Dargwa',
'day' => 'Dayak',
'del' => 'Delaware',
'din' => 'Dinka',
'div' => 'Divehi',
'doi' => 'Dogri',
'dgr' => 'Dogrib',
'dra' => 'Dravidian (Other)',
'dua' => 'Duala',
'dut' => 'Dutch',
'nld' => 'Dutch',
'dum' => 'Dutch, Middle (ca. 1050-1350)',
'dyu' => 'Dyula',
'dzo' => 'Dzongkha',
'efi' => 'Efik',
'egy' => 'Egyptian (Ancient)',
'eka' => 'Ekajuk',
'elx' => 'Elamite',
'eng' => 'English',
'enm' => 'English, Middle (1100-1500)',
'ang' => 'English, Old (ca.450-1100)',
'myv' => 'Erzya',
'epo' => 'Esperanto',
'est' => 'Estonian',
'ewe' => 'Ewe',
'ewo' => 'Ewondo',
'fan' => 'Fang',
'fat' => 'Fanti',
'fao' => 'Faroese',
'fij' => 'Fijian',
'fil' => 'Filipino; Pilipino',
'fin' => 'Finnish',
'fiu' => 'Finno-Ugrian (Other)',
'fon' => 'Fon',
'fre' => 'French',
'fra' => 'French',
'frm' => 'French, Middle (ca.1400-1600)',
'fro' => 'French, Old (842-ca.1400)',
'frs' => 'Frisian, Eastern',
'fry' => 'Frisian, Western',
'fur' => 'Friulian',
'ful' => 'Fulah',
'gaa' => 'Ga',
'gla' => 'Gaelic',
'glg' => 'Gallegan',
'lug' => 'Ganda',
'gay' => 'Gayo',
'gba' => 'Gbaya',
'gez' => 'Geez',
'geo' => 'Georgian',
'kat' => 'Georgian',
'ger' => 'German',
'deu' => 'German',
'nds' => 'German, Low',
'gmh' => 'German, Middle High (ca.1050-1500)',
'goh' => 'German, Old High (ca.750-1050)',
'gem' => 'Germanic (Other)',
'kik' => 'Gikuyu',
'gil' => 'Gilbertese',
'gon' => 'Gondi',
'gor' => 'Gorontalo',
'got' => 'Gothic',
'grb' => 'Grebo',
'grc' => 'Greek, Ancient (to 1453)',
'gre' => 'Greek, Modern (1453-)',
'ell' => 'Greek, Modern (1453-)',
'kal' => 'Greenlandic; Kalaallisut',
'grn' => 'Guarani',
'guj' => 'Gujarati',
'gwi' => 'Gwich\'in',
'hai' => 'Haida',
'hat' => 'Haitian',
'hau' => 'Hausa',
'haw' => 'Hawaiian',
'heb' => 'Hebrew',
'her' => 'Herero',
'hil' => 'Hiligaynon',
'him' => 'Himachali',
'hin' => 'Hindi',
'hmo' => 'Hiri Motu',
'hit' => 'Hittite',
'hmn' => 'Hmong',
'hun' => 'Hungarian',
'hup' => 'Hupa',
'iba' => 'Iban',
'ice' => 'Icelandic',
'isl' => 'Icelandic',
'ido' => 'Ido',
'ibo' => 'Igbo',
'ijo' => 'Ijo',
'ilo' => 'Iloko',
'smn' => 'Inari Sami',
'inc' => 'Indic (Other)',
'ine' => 'Indo-European (Other)',
'ind' => 'Indonesian',
'inh' => 'Ingush',
'ina' => 'Interlingua (International Auxiliary Language Association)',
'ile' => 'Interlingue',
'iku' => 'Inuktitut',
'ipk' => 'Inupiaq',
'ira' => 'Iranian (Other)',
'gle' => 'Irish',
'mga' => 'Irish, Middle (900-1200)',
'sga' => 'Irish, Old (to 900)',
'iro' => 'Iroquoian languages',
'ita' => 'Italian',
'jpn' => 'Japanese',
'jav' => 'Javanese',
'jrb' => 'Judeo-Arabic',
'jpr' => 'Judeo-Persian',
'kbd' => 'Kabardian',
'kab' => 'Kabyle',
'kac' => 'Kachin',
'kal' => 'Kalaallisut',
'xal' => 'Kalmyk',
'kam' => 'Kamba',
'kan' => 'Kannada',
'kau' => 'Kanuri',
'krc' => 'Karachay-Balkar',
'kaa' => 'Kara-Kalpak',
'krl' => 'Karelian',
'kar' => 'Karen',
'kas' => 'Kashmiri',
'csb' => 'Kashubian',
'kaw' => 'Kawi',
'kaz' => 'Kazakh',
'kha' => 'Khasi',
'khm' => 'Khmer',
'khi' => 'Khoisan (Other)',
'kho' => 'Khotanese',
'kik' => 'Kikuyu',
'kmb' => 'Kimbundu',
'kin' => 'Kinyarwanda',
'kir' => 'Kirghiz',
'tlh' => 'Klingon; tlhIngan-Hol',
'kom' => 'Komi',
'kon' => 'Kongo',
'kok' => 'Konkani',
'kor' => 'Korean',
'kos' => 'Kosraean',
'kpe' => 'Kpelle',
'kro' => 'Kru',
'kua' => 'Kuanyama',
'kum' => 'Kumyk',
'kur' => 'Kurdish',
'kru' => 'Kurukh',
'kut' => 'Kutenai',
'kua' => 'Kwanyama',
'lad' => 'Ladino',
'lah' => 'Lahnda',
'lam' => 'Lamba',
'lao' => 'Lao',
'lat' => 'Latin',
'lav' => 'Latvian',
'ltz' => 'Letzeburgesch',
'lez' => 'Lezghian',
'lim' => 'Limburgan',
'lin' => 'Lingala',
'lit' => 'Lithuanian',
'jbo' => 'Lojban',
'nds' => 'Low German',
'dsb' => 'Lower Sorbian',
'loz' => 'Lozi',
'lub' => 'Luba-Katanga',
'lua' => 'Luba-Lulua',
'lui' => 'Luiseno',
'smj' => 'Lule Sami',
'lun' => 'Lunda',
'luo' => 'Luo (Kenya and Tanzania)',
'lus' => 'Lushai',
'ltz' => 'Luxembourgish',
'mac' => 'Macedonian',
'mkd' => 'Macedonian',
'mad' => 'Madurese',
'mag' => 'Magahi',
'mai' => 'Maithili',
'mak' => 'Makasar',
'mlg' => 'Malagasy',
'may' => 'Malay',
'msa' => 'Malay',
'mal' => 'Malayalam',
'mlt' => 'Maltese',
'mnc' => 'Manchu',
'mdr' => 'Mandar',
'man' => 'Mandingo',
'mni' => 'Manipuri',
'mno' => 'Manobo languages',
'glv' => 'Manx',
'mao' => 'Maori',
'mri' => 'Maori',
'mar' => 'Marathi',
'chm' => 'Mari',
'mah' => 'Marshallese',
'mwr' => 'Marwari',
'mas' => 'Masai',
'myn' => 'Mayan languages',
'men' => 'Mende',
'mic' => 'Micmac',
'min' => 'Minangkabau',
'mwl' => 'Mirandese',
'mis' => 'Miscellaneous languages',
'moh' => 'Mohawk',
'mdf' => 'Moksha',
'mol' => 'Moldavian',
'mkh' => 'Mon-Khmer (Other)',
'lol' => 'Mongo',
'mon' => 'Mongolian',
'mos' => 'Mossi',
'mul' => 'Multiple languages',
'mun' => 'Munda languages',
'nah' => 'Nahuatl',
'nau' => 'Nauru',
'nav' => 'Navaho; Navajo',
'nde' => 'Ndebele, North',
'nbl' => 'Ndebele, South',
'ndo' => 'Ndonga',
'nap' => 'Neapolitan',
'nep' => 'Nepali',
'new' => 'Newari',
'nia' => 'Nias',
'nic' => 'Niger-Kordofanian (Other)',
'ssa' => 'Nilo-Saharan (Other)',
'niu' => 'Niuean',
'nog' => 'Nogai',
'non' => 'Norse, Old',
'nai' => 'North American Indian (Other)',
'frr' => 'Northern Frisian',
'sme' => 'Northern Sami',
'nso' => 'Northern Sotho; Pedi; Sepedi',
'nde' => 'North Ndebele',
'nor' => 'Norwegian',
'nob' => 'Norwegian Bokmal',
'nno' => 'Norwegian Nynorsk',
'nub' => 'Nubian languages',
'nym' => 'Nyamwezi',
'nya' => 'Nyanja',
'nyn' => 'Nyankole',
'nno' => 'Nynorsk, Norwegian',
'nyo' => 'Nyoro',
'nzi' => 'Nzima',
'oci' => 'Occitan (post 1500)',
'oji' => 'Ojibwa',
'ori' => 'Oriya',
'orm' => 'Oromo',
'osa' => 'Osage',
'oss' => 'Ossetian; Ossetic',
'oto' => 'Otomian languages',
'pal' => 'Pahlavi',
'pau' => 'Palauan',
'pli' => 'Pali',
'pam' => 'Pampanga',
'pag' => 'Pangasinan',
'pan' => 'Panjabi',
'pap' => 'Papiamento',
'paa' => 'Papuan (Other)',
'per' => 'Persian',
'fas' => 'Persian',
'peo' => 'Persian, Old (ca.600-400)',
'phi' => 'Philippine (Other)',
'phn' => 'Phoenician',
'pon' => 'Pohnpeian',
'pol' => 'Polish',
'por' => 'Portuguese',
'pra' => 'Prakrit languages',
'oci' => 'Provencal',
'pro' => 'Provencal, Old (to 1500)',
'pan' => 'Punjabi',
'pus' => 'Pushto',
'que' => 'Quechua',
'roh' => 'Raeto-Romance',
'raj' => 'Rajasthani',
'rap' => 'Rapanui',
'rar' => 'Rarotongan',
'qaa' => 'Reserved for local use',
'qtz' => 'Reserved for local use',
'roa' => 'Romance (Other)',
'rum' => 'Romanian',
'ron' => 'Romanian',
'rom' => 'Romany',
'run' => 'Rundi',
'rus' => 'Russian',
'sal' => 'Salishan languages',
'sam' => 'Samaritan Aramaic',
'smi' => 'Sami languages (Other)',
'smo' => 'Samoan',
'sad' => 'Sandawe',
'sag' => 'Sango',
'san' => 'Sanskrit',
'sat' => 'Santali',
'srd' => 'Sardinian',
'sas' => 'Sasak',
'nds' => 'Saxon, Low',
'sco' => 'Scots',
'gla' => 'Scottish Gaelic',
'sel' => 'Selkup',
'sem' => 'Semitic (Other)',
'nso' => 'Sepedi; Northern Sotho; Pedi',
'scc' => 'Serbian',
'srp' => 'Serbian',
'srr' => 'Serer',
'shn' => 'Shan',
'sna' => 'Shona',
'iii' => 'Sichuan Yi',
'scn' => 'Sicilian',
'sid' => 'Sidamo',
'sgn' => 'Sign languages',
'bla' => 'Siksika',
'snd' => 'Sindhi',
'sin' => 'Sinhalese',
'sit' => 'Sino-Tibetan (Other)',
'sio' => 'Siouan languages',
'sms' => 'Skolt Sami',
'den' => 'Slave (Athapascan)',
'sla' => 'Slavic (Other)',
'slo' => 'Slovak',
'slk' => 'Slovak',
'slv' => 'Slovenian',
'sog' => 'Sogdian',
'som' => 'Somali',
'son' => 'Songhai',
'snk' => 'Soninke',
'wen' => 'Sorbian languages',
'nso' => 'Sotho, Northern',
'sot' => 'Sotho, Southern',
'sai' => 'South American Indian (Other)',
'alt' => 'Southern Altai',
'sma' => 'Southern Sami',
'nbl' => 'South Ndebele',
'spa' => 'Spanish',
'srn' => 'Sranan Tongo',
'suk' => 'Sukuma',
'sux' => 'Sumerian',
'sun' => 'Sundanese',
'sus' => 'Susu',
'swa' => 'Swahili',
'ssw' => 'Swati',
'swe' => 'Swedish',
'gsw' => 'Swiss German; Alemanic',
'syr' => 'Syriac',
'tgl' => 'Tagalog',
'tah' => 'Tahitian',
'tai' => 'Tai (Other)',
'tgk' => 'Tajik',
'tmh' => 'Tamashek',
'tam' => 'Tamil',
'tat' => 'Tatar',
'tel' => 'Telugu',
'ter' => 'Tereno',
'tet' => 'Tetum',
'tha' => 'Thai',
'tib' => 'Tibetan',
'bod' => 'Tibetan',
'tig' => 'Tigre',
'tir' => 'Tigrinya',
'tem' => 'Timne',
'tiv' => 'Tiv',
'tlh' => 'tlhIngan-Hol; Klingon',
'tli' => 'Tlingit',
'tpi' => 'Tok Pisin',
'tkl' => 'Tokelau',
'tog' => 'Tonga (Nyasa)',
'ton' => 'Tonga (Tonga Islands)',
'tsi' => 'Tsimshian',
'tso' => 'Tsonga',
'tsn' => 'Tswana',
'tum' => 'Tumbuka',
'tup' => 'Tupi languages',
'tur' => 'Turkish',
'ota' => 'Turkish, Ottoman (1500-1928)',
'tuk' => 'Turkmen',
'tvl' => 'Tuvalu',
'tyv' => 'Tuvinian',
'twi' => 'Twi',
'udm' => 'Udmurt',
'uga' => 'Ugaritic',
'uig' => 'Uighur',
'ukr' => 'Ukrainian',
'umb' => 'Umbundu',
'und' => 'Undetermined',
'hsb' => 'Upper Sorbian',
'urd' => 'Urdu',
'uzb' => 'Uzbek',
'vai' => 'Vai',
'cat' => 'Valencian',
'ven' => 'Venda',
'vie' => 'Vietnamese',
'vol' => 'Volapuk',
'vot' => 'Votic',
'wak' => 'Wakashan languages',
'wal' => 'Walamo',
'wln' => 'Walloon',
'war' => 'Waray',
'was' => 'Washo',
'wel' => 'Welsh',
'cym' => 'Welsh',
'fry' => 'Wester Frisian',
'wol' => 'Wolof',
'xho' => 'Xhosa',
'sah' => 'Yakut',
'yao' => 'Yao',
'yap' => 'Yapese',
'yid' => 'Yiddish',
'yor' => 'Yoruba',
'ypk' => 'Yupik languages',
'znd' => 'Zande',
'zap' => 'Zapotec',
'zen' => 'Zenaga',
'zha' => 'Zhuang',
'zul' => 'Zulu',
'zun' => 'Zuni'
}
def is_valid_lang_code(value)
if value.include? '-'
lang, sublang = value.split('-', 2)
else
lang = value
end
!!ISO_LANG[lang.downcase]
end

View file

@ -0,0 +1,30 @@
# adapted from feedvalidator, original copyright license is
#
# Copyright (c) 2002-2006, Sam Ruby, Mark Pilgrim, Joseph Walton, and Phil Ringnalda
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# mime_re = Regexp.new('[^\s()<>,;:\\"/[\]?=]+/[^\s()<>,;:\\"/[\]?=]+(\s*;\s*[^\s()<>,;:\\"/[\]?=]+=("(\\"|[^"])*"|[^\s()<>,;:\\"/[\]?=]+))*$')
def is_valid_mime_type(value)
# !!mime_re.match(value)
true
end

View file

@ -0,0 +1,89 @@
# adapted from feedvalidator, original copyright license is
#
# Copyright (c) 2002-2006, Sam Ruby, Mark Pilgrim, Joseph Walton, and Phil Ringnalda
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
iana_schemes = [ # http://www.iana.org/assignments/uri-schemes.html
"ftp", "http", "gopher", "mailto", "news", "nntp", "telnet", "wais",
"file", "prospero", "z39.50s", "z39.50r", "cid", "mid", "vemmi",
"service", "imap", "nfs", "acap", "rtsp", "tip", "pop", "data", "dav",
"opaquelocktoken", "sip", "sips", "tel", "fax", "modem", "ldap",
"https", "soap.beep", "soap.beeps", "xmlrpc.beep", "xmlrpc.beeps",
"urn", "go", "h323", "ipp", "tftp", "mupdate", "pres", "im", "mtqp",
"iris.beep", "dict", "snmp", "crid", "tag", "dns", "info"
]
ALLOWED_SCHEMES = iana_schemes + ['javascript']
RFC2396 = Regexp.new("^([a-zA-Z][0-9a-zA-Z+\\-\\.]*:)?/{0,2}[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]*$", Regexp::MULTILINE)
rfc2396_full = Regexp.new("[a-zA-Z][0-9a-zA-Z+\\-\\.]*:(//)?[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]+$")
URN = Regexp.new("^[Uu][Rr][Nn]:[a-zA-Z0-9][a-zA-Z0-9-]{1,31}:([a-zA-Z0-9()+,\.:=@;$_!*'\-]|%[0-9A-Fa-f]{2})+$")
TAG = Regexp.new("^tag:([a-z0-9\\-\._]+?@)?[a-z0-9\.\-]+?,\d{4}(-\d{2}(-\d{2})?)?:[0-9a-zA-Z;/\?:@&=+$\.\-_!~*'\(\)%,]*(#[0-9a-zA-Z;/\?:@&=+$\.\-_!~*'\(\)%,]*)?$")
def is_valid_uri(value, uri_pattern = RFC2396)
scheme = value.split(':').first
scheme.downcase! if scheme
if scheme == 'tag'
if !TAG.match(value)
return false, "invalid-tag-uri"
end
elsif scheme == "urn"
if !URN.match(value)
return false, "invalid-urn"
end
elsif uri_pattern.match(value).to_a.reject{|i| i == ''}.compact.length == 0 || uri_pattern.match(value)[0] != value
urichars = Regexp.new("^[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]$", Regexp::MULTILINE)
if value.length > 0
value.each_byte do |b|
if b < 128 and !urichars.match([b].pack('c*'))
return false, "invalid-uri-char"
end
end
else
begin
if uri_pattern.match(value.encode('idna'))
return false, "uri-not-iri"
end
rescue
end
return false, "invalid-uri"
end
elsif ['http','ftp'].include?(scheme)
if !value.match(%r{^\w+://[^/].*})
return false, "invalid-http-or-ftp-uri"
end
elsif value.index(':') && scheme.match(/^[a-z]+$/) && !ALLOWED_SCHEMES.include?(scheme)
return false, "invalid-scheme"
end
return true, ""
end
def is_valid_iri(value)
begin
if value.length > 0
value = value.encode('idna')
end
rescue
end
is_valid_uri(value)
end
def is_valid_fully_qualified_uri(value)
is_valid_uri(value, rfc2396_full)
end

View file

@ -0,0 +1,830 @@
# HTML 5 conformance checker
#
# Warning: this module is experimental, incomplete, and subject to removal at any time.
#
# Usage:
# >>> from html5lib.html5parser import HTMLParser
# >>> from html5lib.filters.validator import HTMLConformanceChecker
# >>> p = HTMLParser(tokenizer=HTMLConformanceChecker)
# >>> p.parse('<!doctype html>\n<html foo=bar></html>')
# <<class 'html5lib.treebuilders.simpletree.Document'> nil>
# >>> p.errors
# [((2, 14), 'unknown-attribute', {'attributeName' => u'foo', 'tagName' => u'html'})]
require 'html5/constants'
require 'html5/filters/base'
require 'html5/filters/iso639codes'
require 'html5/filters/rfc3987'
require 'html5/filters/rfc2046'
def _(str); str; end
class String
# lifted from rails
def underscore()
self.gsub(/::/, '/').
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
gsub(/([a-z\d])([A-Z])/,'\1_\2').
tr("-", "_").
downcase
end
end
HTML5::E.update({
"unknown-start-tag" =>
_("Unknown start tag <%(tagName)>."),
"unknown-attribute" =>
_("Unknown '%(attributeName)' attribute on <%(tagName)>."),
"missing-required-attribute" =>
_("The '%(attributeName)' attribute is required on <%(tagName)>."),
"unknown-input-type" =>
_("Illegal value for attribute on <input type='%(inputType)'>."),
"attribute-not-allowed-on-this-input-type" =>
_("The '%(attributeName)' attribute is not allowed on <input type=%(inputType)>."),
"deprecated-attribute" =>
_("This attribute is deprecated: '%(attributeName)' attribute on <%(tagName)>."),
"duplicate-value-in-token-list" =>
_("Duplicate value in token list: '%(attributeValue)' in '%(attributeName)' attribute on <%(tagName)>."),
"invalid-attribute-value" =>
_("Invalid attribute value: '%(attributeName)' attribute on <%(tagName)>."),
"space-in-id" =>
_("Whitespace is not allowed here: '%(attributeName)' attribute on <%(tagName)>."),
"duplicate-id" =>
_("This ID was already defined earlier: 'id' attribute on <%(tagName)>."),
"attribute-value-can-not-be-blank" =>
_("This value can not be blank: '%(attributeName)' attribute on <%(tagName)>."),
"id-does-not-exist" =>
_("This value refers to a non-existent ID: '%(attributeName)' attribute on <%(tagName)>."),
"invalid-enumerated-value" =>
_("Value must be one of %(enumeratedValues): '%(attributeName)' attribute on <%tagName)>."),
"invalid-boolean-value" =>
_("Value must be one of %(enumeratedValues): '%(attributeName)' attribute on <%tagName)>."),
"contextmenu-must-point-to-menu" =>
_("The contextmenu attribute must point to an ID defined on a <menu> element."),
"invalid-lang-code" =>
_("Invalid language code: '%(attributeName)' attibute on <%(tagName)>."),
"invalid-integer-value" =>
_("Value must be an integer: '%(attributeName)' attribute on <%tagName)>."),
"invalid-root-namespace" =>
_("Root namespace must be 'http://www.w3.org/1999/xhtml', or omitted."),
"invalid-browsing-context" =>
_("Value must be one of ('_self', '_parent', '_top'), or a name that does not start with '_' => '%(attributeName)' attribute on <%(tagName)>."),
"invalid-tag-uri" =>
_("Invalid URI: '%(attributeName)' attribute on <%(tagName)>."),
"invalid-urn" =>
_("Invalid URN: '%(attributeName)' attribute on <%(tagName)>."),
"invalid-uri-char" =>
_("Illegal character in URI: '%(attributeName)' attribute on <%(tagName)>."),
"uri-not-iri" =>
_("Expected a URI but found an IRI: '%(attributeName)' attribute on <%(tagName)>."),
"invalid-uri" =>
_("Invalid URI: '%(attributeName)' attribute on <%(tagName)>."),
"invalid-http-or-ftp-uri" =>
_("Invalid URI: '%(attributeName)' attribute on <%(tagName)>."),
"invalid-scheme" =>
_("Unregistered URI scheme: '%(attributeName)' attribute on <%(tagName)>."),
"invalid-rel" =>
_("Invalid link relation: '%(attributeName)' attribute on <%(tagName)>."),
"invalid-mime-type" =>
_("Invalid MIME type: '%(attributeName)' attribute on <%(tagName)>."),
})
class HTMLConformanceChecker < HTML5::Filters::Base
@@global_attributes = %w[class contenteditable contextmenu dir
draggable id irrelevant lang ref tabindex template
title onabort onbeforeunload onblur onchange onclick
oncontextmenu ondblclick ondrag ondragend ondragenter
ondragleave ondragover ondragstart ondrop onerror
onfocus onkeydown onkeypress onkeyup onload onmessage
onmousedown onmousemove onmouseout onmouseover onmouseup
onmousewheel onresize onscroll onselect onsubmit onunload]
# XXX lang in HTML only, xml:lang in XHTML only
# XXX validate ref, template
@@allowed_attribute_map = {
'html' => %w[xmlns],
'head' => [],
'title' => [],
'base' => %w[href target],
'link' => %w[href rel media hreflang type],
'meta' => %w[name http-equiv content charset], # XXX charset in HTML only
'style' => %w[media type scoped],
'body' => [],
'section' => [],
'nav' => [],
'article' => [],
'blockquote' => %w[cite],
'aside' => [],
'h1' => [],
'h2' => [],
'h3' => [],
'h4' => [],
'h5' => [],
'h6' => [],
'header' => [],
'footer' => [],
'address' => [],
'p' => [],
'hr' => [],
'br' => [],
'dialog' => [],
'pre' => [],
'ol' => %w[start],
'ul' => [],
'li' => %w[value], # XXX depends on parent
'dl' => [],
'dt' => [],
'dd' => [],
'a' => %w[href target ping rel media hreflang type],
'q' => %w[cite],
'cite' => [],
'em' => [],
'strong' => [],
'small' => [],
'm' => [],
'dfn' => [],
'abbr' => [],
'time' => %w[datetime],
'meter' => %w[value min low high max optimum],
'progress' => %w[value max],
'code' => [],
'var' => [],
'samp' => [],
'kbd' => [],
'sup' => [],
'sub' => [],
'span' => [],
'i' => [],
'b' => [],
'bdo' => [],
'ins' => %w[cite datetime],
'del' => %w[cite datetime],
'figure' => [],
'img' => %w[alt src usemap ismap height width], # XXX ismap depends on parent
'iframe' => %w[src],
# <embed> handled separately
'object' => %w[data type usemap height width],
'param' => %w[name value],
'video' => %w[src autoplay start loopstart loopend end loopcount controls],
'audio' => %w[src autoplay start loopstart loopend end loopcount controls],
'source' => %w[src type media],
'canvas' => %w[height width],
'map' => [],
'area' => %w[alt coords shape href target ping rel media hreflang type],
'table' => [],
'caption' => [],
'colgroup' => %w[span], # XXX only if element contains no <col> elements
'col' => %w[span],
'tbody' => [],
'thead' => [],
'tfoot' => [],
'tr' => [],
'td' => %w[colspan rowspan],
'th' => %w[colspan rowspan scope],
# all possible <input> attributes are listed here but <input> is really handled separately
'input' => %w[accept accesskey action alt autocomplete autofocus checked
disabled enctype form inputmode list maxlength method min
max name pattern step readonly replace required size src
tabindex target template value
],
'form' => %w[action method enctype accept name onsubmit onreset accept-charset
data replace
],
'button' => %w[action enctype method replace template name value type disabled form autofocus], # XXX may need matrix of acceptable attributes based on value of type attribute (like input)
'select' => %w[name size multiple disabled data accesskey form autofocus],
'optgroup' => %w[disabled label],
'option' => %w[selected disabled label value],
'textarea' => %w[maxlength name rows cols disabled readonly required form autofocus wrap accept],
'label' => %w[for accesskey form],
'fieldset' => %w[disabled form],
'output' => %w[form name for onforminput onformchange],
'datalist' => %w[data],
# XXX repetition model for repeating form controls
'script' => %w[src defer async type],
'noscript' => [],
'noembed' => [],
'event-source' => %w[src],
'details' => %w[open],
'datagrid' => %w[multiple disabled],
'command' => %w[type label icon hidden disabled checked radiogroup default],
'menu' => %w[type label autosubmit],
'datatemplate' => [],
'rule' => [],
'nest' => [],
'legend' => [],
'div' => [],
'font' => %w[style]
}
@@required_attribute_map = {
'link' => %w[href rel],
'bdo' => %w[dir],
'img' => %w[src],
'embed' => %w[src],
'object' => [], # XXX one of 'data' or 'type' is required
'param' => %w[name value],
'source' => %w[src],
'map' => %w[id]
}
@@input_type_allowed_attribute_map = {
'text' => %w[accesskey autocomplete autofocus disabled form inputmode list maxlength name pattern readonly required size tabindex value],
'password' => %w[accesskey autocomplete autofocus disabled form inputmode maxlength name pattern readonly required size tabindex value],
'checkbox' => %w[accesskey autofocus checked disabled form name required tabindex value],
'radio' => %w[accesskey autofocus checked disabled form name required tabindex value],
'button' => %w[accesskey autofocus disabled form name tabindex value],
'submit' => %w[accesskey action autofocus disabled enctype form method name replace tabindex target value],
'reset' => %w[accesskey autofocus disabled form name tabindex value],
'add' => %w[accesskey autofocus disabled form name tabindex template value],
'remove' => %w[accesskey autofocus disabled form name tabindex value],
'move-up' => %w[accesskey autofocus disabled form name tabindex value],
'move-down' => %w[accesskey autofocus disabled form name tabindex value],
'file' => %w[accept accesskey autofocus disabled form min max name required tabindex],
'hidden' => %w[disabled form name value],
'image' => %w[accesskey action alt autofocus disabled enctype form method name replace src tabindex target],
'datetime' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
'datetime-local' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
'date' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
'month' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
'week' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
'time' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
'number' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
'range' => %w[accesskey autocomplete autofocus disabled form list min max name step readonly required tabindex value],
'email' => %w[accesskey autocomplete autofocus disabled form inputmode list maxlength name pattern readonly required tabindex value],
'url' => %w[accesskey autocomplete autofocus disabled form inputmode list maxlength name pattern readonly required tabindex value],
}
@@input_type_deprecated_attribute_map = {
'text' => ['size'],
'password' => ['size']
}
@@link_rel_values = %w[alternate archive archives author contact feed first begin start help icon index top contents toc last end license copyright next pingback prefetch prev previous search stylesheet sidebar tag up]
@@a_rel_values = %w[alternate archive archives author contact feed first begin start help index top contents toc last end license copyright next prev previous search sidebar tag up bookmark external nofollow]
def initialize(stream, *args)
super(HTML5::HTMLTokenizer.new(stream, *args))
@things_that_define_an_id = []
@things_that_point_to_an_id = []
@ids_we_have_known_and_loved = []
end
def each
__getobj__.each do |token|
method = "validate_#{token.fetch(:type, '-').to_s.underscore}_#{token.fetch(:name, '-').to_s.underscore}"
if respond_to?(method)
send(method, token){|t| yield t }
else
method = "validate_#{token.fetch(:type, '-').to_s.underscore}"
if respond_to?(method)
send(method, token) do |t|
yield t
end
end
end
yield token
end
eof do |t|
yield t
end
end
##########################################################################
# Start tag validation
##########################################################################
def validate_start_tag(token)
check_unknown_start_tag(token){|t| yield t}
check_start_tag_required_attributes(token) do |t|
yield t
end
check_start_tag_unknown_attributes(token) do |t|
yield t
end
check_attribute_values(token) do |t|
yield t
end
end
def validate_start_tag_embed(token)
check_start_tag_required_attributes(token) do |t|
yield t
end
check_attribute_values(token) do |t|
yield t
end
# spec says "any attributes w/o namespace"
# so don't call check_start_tag_unknown_attributes
end
def validate_start_tag_input(token)
check_attribute_values(token) do |t|
yield t
end
attr_dict = Hash[*token[:data].collect{|(name, value)| [name.downcase, value]}.flatten]
input_type = attr_dict.fetch('type', "text")
if !@@input_type_allowed_attribute_map.keys().include?(input_type)
yield({:type => "ParseError",
:data => "unknown-input-type",
:datavars => {:attrValue => input_type}})
end
allowed_attributes = @@input_type_allowed_attribute_map.fetch(input_type, [])
attr_dict.each do |attr_name, attr_value|
if !@@allowed_attribute_map['input'].include?(attr_name)
yield({:type => "ParseError",
:data => "unknown-attribute",
:datavars => {"tagName" => "input",
"attributeName" => attr_name}})
elsif !allowed_attributes.include?(attr_name)
yield({:type => "ParseError",
:data => "attribute-not-allowed-on-this-input-type",
:datavars => {"attributeName" => attr_name,
"inputType" => input_type}})
end
if @@input_type_deprecated_attribute_map.fetch(input_type, []).include?(attr_name)
yield({:type => "ParseError",
:data => "deprecated-attribute",
:datavars => {"attributeName" => attr_name,
"inputType" => input_type}})
end
end
end
##########################################################################
# Start tag validation helpers
##########################################################################
def check_unknown_start_tag(token)
# check for recognized tag name
name = (token[:name] || "").downcase
if !@@allowed_attribute_map.keys.include?(name)
yield({:type => "ParseError",
:data => "unknown-start-tag",
:datavars => {"tagName" => name}})
end
end
def check_start_tag_required_attributes(token)
# check for presence of required attributes
name = (token[:name] || "").downcase
if @@required_attribute_map.keys().include?(name)
attrs_present = (token[:data] || []).collect{|t| t[0]}
for attr_name in @@required_attribute_map[name]
if !attrs_present.include?(attr_name)
yield( {:type => "ParseError",
:data => "missing-required-attribute",
:datavars => {"tagName" => name,
"attributeName" => attr_name}})
end
end
end
end
def check_start_tag_unknown_attributes(token)
# check for recognized attribute names
name = token[:name].downcase
allowed_attributes = @@global_attributes | @@allowed_attribute_map.fetch(name, [])
for attr_name, attr_value in token.fetch(:data, [])
if !allowed_attributes.include?(attr_name.downcase())
yield( {:type => "ParseError",
:data => "unknown-attribute",
:datavars => {"tagName" => name,
"attributeName" => attr_name}})
end
end
end
##########################################################################
# Attribute validation helpers
##########################################################################
# def checkURI(token, tag_name, attr_name, attr_value)
# is_valid, error_code = rfc3987.is_valid_uri(attr_value)
# if not is_valid
# yield {:type => "ParseError",
# :data => error_code,
# :datavars => {"tagName" => tag_name,
# "attributeName" => attr_name}}
# yield {:type => "ParseError",
# :data => "invalid-attribute-value",
# :datavars => {"tagName" => tag_name,
# "attributeName" => attr_name}}
def check_iri(token, tag_name, attr_name, attr_value)
is_valid, error_code = is_valid_iri(attr_value)
if !is_valid
yield({:type => "ParseError",
:data => error_code,
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name}})
yield({:type => "ParseError",
:data => "invalid-attribute-value",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name}})
end
end
def check_id(token, tag_name, attr_name, attr_value)
if !attr_value || attr_value.length == 0
yield({:type => "ParseError",
:data => "attribute-value-can-not-be-blank",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name}})
end
attr_value.each_byte do |b|
c = [b].pack('c*')
if HTML5::SPACE_CHARACTERS.include?(c)
yield( {:type => "ParseError",
:data => "space-in-id",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name}})
yield( {:type => "ParseError",
:data => "invalid-attribute-value",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name}})
break
end
end
end
def parse_token_list(value)
valueList = []
currentValue = ''
(value + ' ').each_byte do |b|
c = [b].pack('c*')
if HTML5::SPACE_CHARACTERS.include?(c)
if currentValue.length > 0
valueList << currentValue
currentValue = ''
end
else
currentValue += c
end
end
if currentValue.length > 0
valueList << currentValue
end
valueList
end
def check_token_list(tag_name, attr_name, attr_value)
# The "token" in the method name refers to tokens in an attribute value
# i.e. http://www.whatwg.org/specs/web-apps/current-work/#set-of
# but the "token" parameter refers to the token generated from
# HTMLTokenizer. Sorry for the confusion.
value_list = parse_token_list(attr_value)
value_dict = {}
for current_value in value_list
if value_dict.has_key?(current_value)
yield({:type => "ParseError",
:data => "duplicate-value-in-token-list",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name,
"attributeValue" => current_value}})
break
end
value_dict[current_value] = 1
end
end
def check_enumerated_value(token, tag_name, attr_name, attr_value, enumerated_values)
if !attr_value || attr_value.length == 0
yield( {:type => "ParseError",
:data => "attribute-value-can-not-be-blank",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name}})
return
end
attr_value.downcase!
if !enumerated_values.include?(attr_value)
yield( {:type => "ParseError",
:data => "invalid-enumerated-value",
:datavars => {"tagName" => tag_name,
"attribute_name" => attr_name,
"enumeratedValues" => enumerated_values}})
yield( {:type => "ParseError",
:data => "invalid-attribute-value",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name}})
end
end
def check_boolean(token, tag_name, attr_name, attr_value)
enumerated_values = [attr_name, '']
if !enumerated_values.include?(attr_value)
yield( {:type => "ParseError",
:data => "invalid-boolean-value",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name,
"enumeratedValues" => enumerated_values}})
yield( {:type => "ParseError",
:data => "invalid-attribute-value",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name}})
end
end
def check_integer(token, tag_name, attr_name, attr_value)
sign = 1
number_string = ''
state = 'begin' # ('begin', 'initial-number', 'number', 'trailing-junk')
error = {:type => "ParseError",
:data => "invalid-integer-value",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name,
"attributeValue" => attr_value}}
attr_value.scan(/./) do |c|
if state == 'begin'
if HTML5::SPACE_CHARACTERS.include?(c)
next
elsif c == '-'
sign = -1
state = 'initial-number'
elsif HTML5::DIGITS.include?(c)
number_string += c
state = 'in-number'
else
yield error
return
end
elsif state == 'initial-number'
if !HTML5::DIGITS.include?(c)
yield error
return
end
number_string += c
state = 'in-number'
elsif state == 'in-number'
if HTML5::DIGITS.include?(c)
number_string += c
else
state = 'trailing-junk'
end
elsif state == 'trailing-junk'
next
end
end
if number_string.length == 0
yield( {:type => "ParseError",
:data => "attribute-value-can-not-be-blank",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name}})
end
end
def check_floating_point_number(token, tag_name, attr_name, attr_value)
# XXX
end
def check_browsing_context(token, tag_name, attr_name, attr_value)
return if not attr_value
return if attr_value[0] != ?_
attr_value.downcase!
return if ['_self', '_parent', '_top', '_blank'].include?(attr_value)
yield({:type => "ParseError",
:data => "invalid-browsing-context",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name}})
end
def check_lang_code(token, tag_name, attr_name, attr_value)
return if !attr_value || attr_value == '' # blank is OK
if not is_valid_lang_code(attr_value)
yield( {:type => "ParseError",
:data => "invalid-lang-code",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name,
"attributeValue" => attr_value}})
end
end
def check_mime_type(token, tag_name, attr_name, attr_value)
# XXX needs tests
if not attr_value
yield( {:type => "ParseError",
:data => "attribute-value-can-not-be-blank",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name}})
end
if not is_valid_mime_type(attr_value)
yield( {:type => "ParseError",
:data => "invalid-mime-type",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name,
"attributeValue" => attr_value}})
end
end
def check_media_query(token, tag_name, attr_name, attr_value)
# XXX
end
def check_link_relation(token, tag_name, attr_name, attr_value)
check_token_list(tag_name, attr_name, attr_value) do |t|
yield t
end
value_list = parse_token_list(attr_value)
allowed_values = tag_name == 'link' ? @@link_rel_values : @@a_rel_values
for current_value in value_list
if !allowed_values.include?(current_value)
yield({:type => "ParseError",
:data => "invalid-rel",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name}})
end
end
end
def check_date_time(token, tag_name, attr_name, attr_value)
# XXX
state = 'begin' # ('begin', '...
# for c in attr_value
# if state == 'begin' =>
# if SPACE_CHARACTERS.include?(c)
# continue
# elsif digits.include?(c)
# state = ...
end
##########################################################################
# Attribute validation
##########################################################################
def check_attribute_values(token)
tag_name = token.fetch(:name, "")
for attr_name, attr_value in token.fetch(:data, [])
attr_name = attr_name.downcase
method = "validate_attribute_value_#{tag_name.to_s.underscore}_#{attr_name.to_s.underscore}"
if respond_to?(method)
send(method, token, tag_name, attr_name, attr_value) do |t|
yield t
end
else
method = "validate_attribute_value_#{attr_name.to_s.underscore}"
if respond_to?(method)
send(method, token, tag_name, attr_name, attr_value) do |t|
yield t
end
end
end
end
end
def validate_attribute_value_class(token, tag_name, attr_name, attr_value)
check_token_list(tag_name, attr_name, attr_value) do |t|
yield t
yield( {:type => "ParseError",
:data => "invalid-attribute-value",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name}})
end
end
def validate_attribute_value_contenteditable(token, tag_name, attr_name, attr_value)
check_enumerated_value(token, tag_name, attr_name, attr_value, ['true', 'false', '']) do |t|
yield t
end
end
def validate_attribute_value_dir(token, tag_name, attr_name, attr_value)
check_enumerated_value(token, tag_name, attr_name, attr_value, ['ltr', 'rtl']) do |t|
yield t
end
end
def validate_attribute_value_draggable(token, tag_name, attr_name, attr_value)
check_enumerated_value(token, tag_name, attr_name, attr_value, ['true', 'false']) do |t|
yield t
end
end
alias validate_attribute_value_irrelevant check_boolean
alias validate_attribute_value_lang check_lang_code
def validate_attribute_value_contextmenu(token, tag_name, attr_name, attr_value)
check_id(token, tag_name, attr_name, attr_value) do |t|
yield t
end
@things_that_point_to_an_id << token
end
def validate_attribute_value_id(token, tag_name, attr_name, attr_value)
# This method has side effects. It adds 'token' to the list of
# things that define an ID (@things_that_define_an_id) so that we can
# later check 1) whether an ID is duplicated, and 2) whether all the
# things that point to something else by ID (like <label for> or
# <span contextmenu>) point to an ID that actually exists somewhere.
check_id(token, tag_name, attr_name, attr_value) do |t|
yield t
end
return if not attr_value
if @ids_we_have_known_and_loved.include?(attr_value)
yield( {:type => "ParseError",
:data => "duplicate-id",
:datavars => {"tagName" => tag_name}})
end
@ids_we_have_known_and_loved << attr_value
@things_that_define_an_id << token
end
alias validate_attribute_value_tabindex check_integer
def validate_attribute_value_ref(token, tag_name, attr_name, attr_value)
# XXX
end
def validate_attribute_value_template(token, tag_name, attr_name, attr_value)
# XXX
end
def validate_attribute_value_html_xmlns(token, tag_name, attr_name, attr_value)
if attr_value != "http://www.w3.org/1999/xhtml"
yield( {:type => "ParseError",
:data => "invalid-root-namespace",
:datavars => {"tagName" => tag_name,
"attributeName" => attr_name}})
end
end
alias validate_attribute_value_base_href check_iri
alias validate_attribute_value_base_target check_browsing_context
alias validate_attribute_value_link_href check_iri
alias validate_attribute_value_link_rel check_link_relation
alias validate_attribute_value_link_media check_media_query
alias validate_attribute_value_link_hreflang check_lang_code
alias validate_attribute_value_link_type check_mime_type
# XXX <meta> attributes
alias validate_attribute_value_style_media check_media_query
alias validate_attribute_value_style_type check_mime_type
alias validate_attribute_value_style_scoped check_boolean
alias validate_attribute_value_blockquote_cite check_iri
alias validate_attribute_value_ol_start check_integer
alias validate_attribute_value_li_value check_integer
# XXX need tests from here on
alias validate_attribute_value_a_href check_iri
alias validate_attribute_value_a_target check_browsing_context
def validate_attribute_value_a_ping(token, tag_name, attr_name, attr_value)
value_list = parse_token_list(attr_value)
for current_value in value_list
checkIRI(token, tag_name, attr_name, attr_value) do |t|
yield t
end
end
end
alias validate_attribute_value_a_rel check_link_relation
alias validate_attribute_value_a_media check_media_query
alias validate_attribute_value_a_hreflang check_lang_code
alias validate_attribute_value_a_type check_mime_type
alias validate_attribute_value_q_cite check_iri
alias validate_attribute_value_time_datetime check_date_time
alias validate_attribute_value_meter_value check_floating_point_number
alias validate_attribute_value_meter_min check_floating_point_number
alias validate_attribute_value_meter_low check_floating_point_number
alias validate_attribute_value_meter_high check_floating_point_number
alias validate_attribute_value_meter_max check_floating_point_number
alias validate_attribute_value_meter_optimum check_floating_point_number
alias validate_attribute_value_progress_value check_floating_point_number
alias validate_attribute_value_progress_max check_floating_point_number
alias validate_attribute_value_ins_cite check_iri
alias validate_attribute_value_ins_datetime check_date_time
alias validate_attribute_value_del_cite check_iri
alias validate_attribute_value_del_datetime check_date_time
##########################################################################
# Whole document validation (IDs, etc.)
##########################################################################
def eof
for token in @things_that_point_to_an_id
tag_name = token.fetch(:name, "").downcase
attrs_dict = token[:data] # by now html5parser has "normalized" the attrs list into a dict.
# hooray for obscure side effects!
attr_value = attrs_dict.fetch("contextmenu", "")
if attr_value and (!@ids_we_have_known_and_loved.include?(attr_value))
yield( {:type => "ParseError",
:data => "id-does-not-exist",
:datavars => {"tagName" => tag_name,
"attributeName" => "contextmenu",
"attributeValue" => attr_value}})
else
for ref_token in @things_that_define_an_id
id = ref_token.fetch(:data, {}).fetch("id", "")
if not id
continue
end
if id == attr_value
if ref_token.fetch(:name, "").downcase != "men"
yield( {:type => "ParseError",
:data => "contextmenu-must-point-to-menu"})
end
break
end
end
end
end
end
end

View file

@ -100,17 +100,17 @@ module HTML5
method = 'process%s' % token[:type]
case token[:type]
when :Characters, :SpaceCharacters, :Comment
@phase.send method, token[:data]
when :StartTag
@phase.send method, token[:name], token[:data]
when :EndTag
@phase.send method, token[:name]
when :Doctype
@phase.send method, token[:name], token[:publicId],
token[:systemId], token[:correct]
else
parse_error(token[:data])
when :Characters, :SpaceCharacters, :Comment
@phase.send method, token[:data]
when :StartTag
@phase.send method, token[:name], token[:data]
when :EndTag
@phase.send method, token[:name]
when :Doctype
@phase.send method, token[:name], token[:publicId],
token[:systemId], token[:correct]
else
parse_error(token[:data], token[:datavars])
end
end
@ -147,9 +147,9 @@ module HTML5
@tree.get_fragment
end
def parse_error(data = 'XXX ERROR MESSAGE NEEDED')
def parse_error(code = 'XXX-undefined-error', data = {})
# XXX The idea is to make data mandatory.
@errors.push([@tokenizer.stream.position, data])
@errors.push([@tokenizer.stream.position, code, data])
raise ParseError if @strict
end
@ -163,7 +163,7 @@ module HTML5
# thing and if it doesn't it's wrong for everyone.
unless VOID_ELEMENTS.include?(token[:name])
parse_error(_('Solidus (/) incorrectly placed in tag.'))
parse_error("incorrectly-placed-solidus")
end
token[:type] = :StartTag
@ -181,7 +181,7 @@ module HTML5
end
elsif token[:type] == :EndTag
parse_error(_('End tag contains unexpected attributes.')) unless token[:data].empty?
parse_error("attributes-in-end-tag") unless token[:data].empty?
token[:name] = token[:name].downcase
end

View file

@ -12,13 +12,13 @@ module HTML5
end
def processCharacters(data)
parse_error(_('Unexpected non-space characters in the after body phase.'))
parse_error("unexpected-char-after-body")
@parser.phase = @parser.phases[:inBody]
@parser.phase.processCharacters(data)
end
def processStartTag(name, attributes)
parse_error(_("Unexpected start tag token (#{name}) in the after body phase."))
parse_error("unexpected-start-tag-after-body", {"name" => name})
@parser.phase = @parser.phases[:inBody]
@parser.phase.processStartTag(name, attributes)
end
@ -37,7 +37,7 @@ module HTML5
end
def endTagOther(name)
parse_error(_("Unexpected end tag token (#{name}) in the after body phase."))
parse_error("unexpected-end-tag-after-body", {"name" => name})
@parser.phase = @parser.phases[:inBody]
@parser.phase.processEndTag(name)
end

View file

@ -10,7 +10,7 @@ module HTML5
handle_end 'html'
def processCharacters(data)
parse_error(_('Unexpected non-space characters in the after frameset phase. Ignored.'))
parse_error("unexpected-char-after-frameset")
end
def startTagNoframes(name, attributes)
@ -18,7 +18,7 @@ module HTML5
end
def startTagOther(name, attributes)
parse_error(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored."))
parse_error("unexpected-start-tag-after-frameset", {"name" => name})
end
def endTagHtml(name)
@ -27,8 +27,7 @@ module HTML5
end
def endTagOther(name)
parse_error(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored."))
parse_error("unexpected-end-tag-after-frameset", {"name" => name})
end
end
end

View file

@ -26,7 +26,7 @@ module HTML5
end
def startTagFromHead(name, attributes)
parse_error(_("Unexpected start tag (#{name}) that can be in head. Moved."))
parse_error("unexpected-start-tag-out-of-my-head", {"name" => name})
@parser.phase = @parser.phases[:inHead]
@parser.phase.processStartTag(name, attributes)
end

View file

@ -34,7 +34,7 @@ module HTML5
end
def endTagOther(name)
parse_error(_("Unexpected end tag (#{name}) after the (implied) root element."))
parse_error("end-tag-after-implied-root", {"name" => name})
end
end

View file

@ -100,12 +100,12 @@ module HTML5
end
def startTagTitle(name, attributes)
parse_error(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
parse_error("unexpected-start-tag-out-of-my-head", {"name" => name})
@parser.phases[:inHead].processStartTag(name, attributes)
end
def startTagBody(name, attributes)
parse_error(_('Unexpected start tag (body).'))
parse_error("unexpected-start-tag", {"name" => "body"})
if (@tree.open_elements.length == 1 || @tree.open_elements[1].name != 'body')
assert @parser.inner_html
@ -126,7 +126,7 @@ module HTML5
def startTagForm(name, attributes)
if @tree.formPointer
parse_error(_('Unexpected start tag (form). Ignored.'))
parse_error("Unexpected start tag (form). Ignored.")
else
endTagP('p') if in_scope?('p')
@tree.insert_element(name, attributes)
@ -143,7 +143,10 @@ module HTML5
if stopName.include?(node.name)
poppedNodes = (0..i).collect { @tree.open_elements.pop }
if i >= 1
parse_error(_("Missing end tag%s (%s)" % [(i>1 ? 's' : ''), poppedNodes.reverse.map{|item| item.name}.join(', ')]))
parse_error(
i == 1 ? "missing-end-tag" : "missing-end-tags",
{"name" => poppedNodes[0..-1].collect{|n| n.name}.join(", ")})
end
break
end
@ -169,7 +172,7 @@ module HTML5
# Uncomment the following for IE7 behavior:
# HEADING_ELEMENTS.each do |element|
# if in_scope?(element)
# parse_error(_("Unexpected start tag (#{name})."))
# parse_error("unexpected-start-tag", {"name" => name})
#
# remove_open_elements_until do |element|
# HEADING_ELEMENTS.include?(element.name)
@ -183,7 +186,7 @@ module HTML5
def startTagA(name, attributes)
if afeAElement = @tree.elementInActiveFormattingElements('a')
parse_error(_('Unexpected start tag (a) implies end tag (a).'))
parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "a", "endName" => "a"})
endTagFormatting('a')
@tree.open_elements.delete(afeAElement) if @tree.open_elements.include?(afeAElement)
@tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
@ -200,7 +203,7 @@ module HTML5
def startTagNobr(name, attributes)
@tree.reconstructActiveFormattingElements
if in_scope?('nobr')
parse_error(_('Unexpected start tag (nobr) implies end tag (nobr).'))
parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "nobr", "endName" => "nobr"})
processEndTag('nobr')
# XXX Need tests that trigger the following
@tree.reconstructActiveFormattingElements
@ -210,7 +213,7 @@ module HTML5
def startTagButton(name, attributes)
if in_scope?('button')
parse_error(_('Unexpected start tag (button) implied end tag (button).'))
parse_error("unexpected-start-tag-implies-end-tag", {"startName" => "button", "endName" => "button"})
processEndTag('button')
@parser.phase.processStartTag(name, attributes)
else
@ -252,7 +255,7 @@ module HTML5
def startTagImage(name, attributes)
# No really...
parse_error(_('Unexpected start tag (image). Treated as img.'))
parse_error("unexpected-start-tag-treated-as", {"originalName" => "image", "newName" => "img"})
processStartTag('img', attributes)
end
@ -267,7 +270,7 @@ module HTML5
end
def startTagIsindex(name, attributes)
parse_error(_("Unexpected start tag isindex. Don't use it!"))
parse_error("deprecated-tag", {"name" => "isindex"})
return if @tree.formPointer
processStartTag('form', {})
processStartTag('hr', {})
@ -310,13 +313,13 @@ module HTML5
# "caption", "col", "colgroup", "frame", "frameset", "head",
# "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
# "tr", "noscript"
parse_error(_("Unexpected start tag (#{name}). Ignored."))
parse_error("unexpected-start-tag-ignored", {"name" => name})
end
def startTagNew(name, attributes)
# New HTML5 elements, "event-source", "section", "nav",
# "article", "aside", "header", "footer", "datagrid", "command"
sys.stderr.write("Warning: Undefined behaviour for start tag #{name}")
# $stderr.puts("Warning: Undefined behaviour for start tag #{name}")
startTagOther(name, attributes)
#raise NotImplementedError
end
@ -328,7 +331,7 @@ module HTML5
def endTagP(name)
@tree.generateImpliedEndTags('p') if in_scope?('p')
parse_error(_('Unexpected end tag (p).')) unless @tree.open_elements.last.name == 'p'
parse_error("unexpected-end-tag", {"name" => "p"}) unless @tree.open_elements.last.name == 'p'
if in_scope?('p')
@tree.open_elements.pop while in_scope?('p')
else
@ -347,7 +350,9 @@ module HTML5
return
end
unless @tree.open_elements.last.name == 'body'
parse_error(_("Unexpected end tag (body). Missing end tag (#{@tree.open_elements[-1].name})."))
parse_error("expected-one-end-tag-but-got-another",
{"expectedName" => "body",
"gotName" => @tree.open_elements.last.name})
end
@parser.phase = @parser.phases[:afterBody]
end
@ -364,7 +369,7 @@ module HTML5
@tree.generateImpliedEndTags if in_scope?(name)
unless @tree.open_elements.last.name == name
parse_error(_("End tag (#{name}) seen too early. Expected other end tag."))
parse_error("end-tag-too-early", {"name" => name})
end
if in_scope?(name)
@ -377,7 +382,7 @@ module HTML5
@tree.generateImpliedEndTags
end
if @tree.open_elements.last.name != name
parse_error(_("End tag (form) seen too early. Ignored."))
parse_error("end-tag-too-early-ignored", {"name" => "form"})
else
@tree.open_elements.pop
end
@ -389,7 +394,7 @@ module HTML5
@tree.generateImpliedEndTags(name) if in_scope?(name)
unless @tree.open_elements.last.name == name
parse_error(_("End tag (#{name}) seen too early. " + 'Expected other end tag.'))
parse_error("end-tag-too-early", {"name" => name})
end
remove_open_elements_until(name) if in_scope?(name)
@ -404,7 +409,7 @@ module HTML5
end
unless @tree.open_elements.last.name == name
parse_error(_("Unexpected end tag (#{name}). Expected other end tag."))
parse_error("end-tag-too-early", {"name" => name})
end
HEADING_ELEMENTS.each do |element|
@ -423,18 +428,18 @@ module HTML5
# Step 1 paragraph 1
afeElement = @tree.elementInActiveFormattingElements(name)
if !afeElement or (@tree.open_elements.include?(afeElement) && !in_scope?(afeElement.name))
parse_error(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
parse_error("adoption-agency-1.1", {"name" => name})
return
# Step 1 paragraph 2
elsif not @tree.open_elements.include?(afeElement)
parse_error(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
parse_error("adoption-agency-1.2", {"name" => name})
@tree.activeFormattingElements.delete(afeElement)
return
end
# Step 1 paragraph 3
if afeElement != @tree.open_elements.last
parse_error(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
parse_error("adoption-agency-1.3", {"name" => name})
end
# Step 2
@ -531,7 +536,7 @@ module HTML5
@tree.generateImpliedEndTags if in_scope?(name)
unless @tree.open_elements.last.name == name
parse_error(_("Unexpected end tag (#{name}). Expected other end tag first."))
parse_error("end-tag-too-early", {"name" => name})
end
if in_scope?(name)
@ -543,11 +548,12 @@ module HTML5
def endTagMisplaced(name)
# This handles elements with end tags in other insertion modes.
parse_error(_("Unexpected end tag (#{name}). Ignored."))
parse_error("unexpected-end-tag", {"name" => name})
end
def endTagBr(name)
parse_error(_("Unexpected end tag (br). Treated as br element."))
parse_error("unexpected-end-tag-treated-as",
{"originalName" => "br", "newName" => "br element"})
@tree.reconstructActiveFormattingElements
@tree.insert_element(name, {})
@tree.open_elements.pop()
@ -555,21 +561,21 @@ module HTML5
def endTagNone(name)
# This handles elements with no end tag.
parse_error(_("This tag (#{name}) has no end tag"))
parse_error("no-end-tag", {"name" => name})
end
def endTagCdataTextAreaXmp(name)
if @tree.open_elements.last.name == name
@tree.open_elements.pop
else
parse_error(_("Unexpected end tag (#{name}). Ignored."))
parse_error("unexpected-end-tag", {"name" => name})
end
end
def endTagNew(name)
# New HTML5 elements, "event-source", "section", "nav",
# "article", "aside", "header", "footer", "datagrid", "command"
STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
# STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
endTagOther(name)
#raise NotImplementedError
end
@ -581,7 +587,7 @@ module HTML5
@tree.generateImpliedEndTags
unless @tree.open_elements.last.name == name
parse_error(_("Unexpected end tag (#{name})."))
parse_error("unexpected-end-tag", {"name" => name})
end
remove_open_elements_until {|element| element == node }
@ -589,7 +595,7 @@ module HTML5
break
else
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
parse_error(_("Unexpected end tag (#{name}). Ignored."))
parse_error("unexpected-end-tag", {"name" => name})
break
end
end

View file

@ -39,7 +39,9 @@ module HTML5
@tree.generateImpliedEndTags
unless @tree.open_elements[-1].name == 'caption'
parse_error(_("Unexpected end tag (caption). Missing end tags."))
parse_error("expected-one-end-tag-but-got-another",
{"gotName" => "caption",
"expectedNmae" => @tree.open_elements.last.name})
end
remove_open_elements_until('caption')
@ -57,7 +59,7 @@ module HTML5
end
def endTagIgnore(name)
parse_error(_("Unexpected end tag (#{name}). Ignored."))
parse_error("unexpected-end-tag", {"name" => name})
end
def endTagOther(name)

View file

@ -33,7 +33,7 @@ module HTML5
if in_scope?(name, true)
@tree.generateImpliedEndTags(name)
if @tree.open_elements.last.name != name
parse_error("Got table cell end tag (#{name}) while required end tags are missing.")
parse_error("unexpected-cell-end-tag", {"name" => name})
remove_open_elements_until(name)
else
@ -42,12 +42,12 @@ module HTML5
@tree.clearActiveFormattingElements
@parser.phase = @parser.phases[:inRow]
else
parse_error(_("Unexpected end tag (#{name}). Ignored."))
parse_error("unexpected-end-tag", {"name" => name})
end
end
def endTagIgnore(name)
parse_error(_("Unexpected end tag (#{name}). Ignored."))
parse_error("unexpected-end-tag", {"name" => name})
end
def endTagImply(name)

View file

@ -42,7 +42,7 @@ module HTML5
end
def endTagCol(name)
parse_error(_('Unexpected end tag (col). col has no end tag.'))
parse_error("no-end-tag", {"name" => "col"})
end
def endTagOther(name)

View file

@ -10,7 +10,7 @@ module HTML5
handle_end 'frameset', 'noframes'
def processCharacters(data)
parse_error(_('Unexpected characters in the frameset phase. Characters ignored.'))
parse_error("unexpected-char-in-frameset")
end
def startTagFrameset(name, attributes)
@ -27,13 +27,14 @@ module HTML5
end
def startTagOther(name, attributes)
parse_error(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored"))
parse_error("unexpected-start-tag-in-frameset",
{"name" => name})
end
def endTagFrameset(name)
if @tree.open_elements.last.name == 'html'
# inner_html case
parse_error(_("Unexpected end tag token (frameset) in the frameset phase (inner_html)."))
parse_error("unexpected-frameset-in-frameset-innerhtml")
else
@tree.open_elements.pop
end
@ -50,8 +51,7 @@ module HTML5
end
def endTagOther(name)
parse_error(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored."))
parse_error("unexpected-end-tag-in-frameset", {"name" => name})
end
end
end

View file

@ -12,7 +12,7 @@ module HTML5
def process_eof
if ['title', 'style', 'script'].include?(name = @tree.open_elements.last.name)
parse_error(_("Unexpected end of file. Expected end tag (#{name})."))
parse_error("expected-named-closing-tag-but-got-eof", {"name" => @tree.open_elements.last.name})
@tree.open_elements.pop
end
anything_else
@ -29,7 +29,7 @@ module HTML5
end
def startTagHead(name, attributes)
parse_error(_('Unexpected start tag head in existing head. Ignored'))
parse_error("two-heads-are-not-better-than-one")
end
def startTagTitle(name, attributes)
@ -93,7 +93,7 @@ module HTML5
if @tree.open_elements.last.name == 'head'
@tree.open_elements.pop
else
parse_error(_("Unexpected end tag (head). Ignored."))
parse_error("unexpected-end-tag", {"name" => "head"})
end
@parser.phase = @parser.phases[:afterHead]
end
@ -107,12 +107,12 @@ module HTML5
if @tree.open_elements.last.name == name
@tree.open_elements.pop
else
parse_error(_("Unexpected end tag (#{name}). Ignored."))
parse_error("unexpected-end-tag", {"name" => name})
end
end
def endTagOther(name)
parse_error(_("Unexpected end tag (#{name}). Ignored."))
parse_error("unexpected-end-tag", {"name" => name})
end
def anything_else

View file

@ -62,7 +62,8 @@ module HTML5
end
def endTagIgnore(name)
parse_error(_("Unexpected end tag (#{name}) in the row phase. Ignored."))
parse_error("unexpected-end-tag-in-table-row",
{"name" => name})
end
def endTagOther(name)
@ -74,7 +75,8 @@ module HTML5
# XXX unify this with other table helper methods
def clearStackToTableRowContext
until %w[tr html].include?(name = @tree.open_elements.last.name)
parse_error(_("Unexpected implied end tag (#{name}) in the row phase."))
parse_error("unexpected-implied-end-tag-in-table-row",
{"name" => @tree.open_elements.last.name})
@tree.open_elements.pop
end
end

View file

@ -26,19 +26,19 @@ module HTML5
end
def startTagSelect(name, attributes)
parse_error(_('Unexpected start tag (select) in the select phase implies select start tag.'))
parse_error("unexpected-select-in-select")
endTagSelect('select')
end
def startTagOther(name, attributes)
parse_error(_('Unexpected start tag token (#{name}) in the select phase. Ignored.'))
parse_error("unexpected-start-tag-in-select", {"name" => name})
end
def endTagOption(name)
if @tree.open_elements.last.name == 'option'
@tree.open_elements.pop
else
parse_error(_('Unexpected end tag (option) in the select phase. Ignored.'))
parse_error("unexpected-end-tag-in-select", {"name" => "option"})
end
end
@ -52,7 +52,8 @@ module HTML5
@tree.open_elements.pop
# But nothing else
else
parse_error(_('Unexpected end tag (optgroup) in the select phase. Ignored.'))
parse_error("unexpected-end-tag-in-select",
{"name" => "optgroup"})
end
end
@ -68,7 +69,8 @@ module HTML5
end
def endTagTableElements(name)
parse_error(_("Unexpected table end tag (#{name}) in the select phase."))
parse_error("unexpected-end-tag-in-select",
{"name" => name})
if in_scope?(name, true)
endTagSelect('select')

View file

@ -20,7 +20,7 @@ module HTML5
end
def startTagTableCell(name, attributes)
parse_error(_("Unexpected table cell start tag (#{name}) in the table body phase."))
parse_error("unexpected-cell-in-table-body", {"name" => name})
startTagTr('tr', {})
@parser.phase.processStartTag(name, attributes)
end
@ -47,7 +47,8 @@ module HTML5
@tree.open_elements.pop
@parser.phase = @parser.phases[:inTable]
else
parse_error(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
parse_error("unexpected-end-tag-in-table-body",
{"name" => name})
end
end
@ -63,7 +64,8 @@ module HTML5
end
def endTagIgnore(name)
parse_error(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
parse_error("unexpected-end-tag-in-table-body",
{"name" => name})
end
def endTagOther(name)
@ -74,7 +76,8 @@ module HTML5
def clearStackToTableBodyContext
until %w[tbody tfoot thead html].include?(name = @tree.open_elements.last.name)
parse_error(_("Unexpected implied end tag (#{name}) in the table body phase."))
parse_error("unexpected-implied-end-tag-in-table",
{"name" => @tree.open_elements.last.name})
@tree.open_elements.pop
end
end

View file

@ -12,7 +12,7 @@ module HTML5
handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
def processCharacters(data)
parse_error(_("Unexpected non-space characters in table context caused voodoo mode."))
parse_error("unexpected-char-implies-table-voodoo")
# Make all the special element rearranging voodoo kick in
@tree.insert_from_table = true
# Process the character in the "in body" mode
@ -50,13 +50,15 @@ module HTML5
end
def startTagTable(name, attributes)
parse_error(_("Unexpected start tag (table) in table phase. Implies end tag (table)."))
parse_error("unexpected-start-tag-implies-end-tag",
{"startName" => "table", "endName" => "table"})
@parser.phase.processEndTag('table')
@parser.phase.processStartTag(name, attributes) unless @parser.inner_html
end
def startTagOther(name, attributes)
parse_error(_("Unexpected start tag (#{name}) in table context caused voodoo mode."))
parse_error("unexpected-start-tag-implies-table-voodoo",
{"name" => name})
# Make all the special element rearranging voodoo kick in
@tree.insert_from_table = true
# Process the start tag in the "in body" mode
@ -69,7 +71,9 @@ module HTML5
@tree.generateImpliedEndTags
unless @tree.open_elements.last.name == 'table'
parse_error(_("Unexpected end tag (table). Expected end tag (#{@tree.open_elements.last.name})."))
parse_error("end-tag-too-early-named",
{"gotName" => "table",
"expectedName" => @tree.open_elements.last.name})
end
remove_open_elements_until('table')
@ -83,11 +87,11 @@ module HTML5
end
def endTagIgnore(name)
parse_error(_("Unexpected end tag (#{name}). Ignored."))
parse_error("unexpected-end-tag", {"name" => name})
end
def endTagOther(name)
parse_error(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
parse_error("unexpected-end-tag-implies-table-voodoo", {"name" => name})
# Make all the special element rearranging voodoo kick in
@tree.insert_from_table = true
# Process the end tag in the "in body" mode
@ -100,7 +104,8 @@ module HTML5
def clearStackToTableContext
# "clear the stack back to a table context"
until %w[table html].include?(name = @tree.open_elements.last.name)
parse_error(_("Unexpected implied end tag (#{name}) in the table phase."))
parse_error("unexpected-implied-end-tag-in-table",
{"name" => @tree.open_elements.last.name})
@tree.open_elements.pop
end
# When the current node is <html> it's an inner_html case

View file

@ -8,7 +8,7 @@ module HTML5
# "quirks mode". It is expected that a future version of HTML5 will define this.
def process_eof
parse_error(_('Unexpected End of file. Expected DOCTYPE.'))
parse_error("expected-doctype-but-got-eof")
@parser.phase = @parser.phases[:rootElement]
@parser.phase.process_eof
end
@ -19,7 +19,7 @@ module HTML5
def processDoctype(name, publicId, systemId, correct)
if name.downcase != 'html' or publicId or systemId
parse_error(_('Erroneous DOCTYPE.'))
parse_error("unknown-doctype")
end
# XXX need to update DOCTYPE tokens
@tree.insertDoctype(name, publicId, systemId)
@ -113,22 +113,21 @@ module HTML5
end
def processCharacters(data)
parse_error(_('Unexpected non-space characters. Expected DOCTYPE.'))
parse_error("expected-doctype-but-got-chars")
@parser.phase = @parser.phases[:rootElement]
@parser.phase.processCharacters(data)
end
def processStartTag(name, attributes)
parse_error(_("Unexpected start tag (#{name}). Expected DOCTYPE."))
parse_error("expected-doctype-but-got-start-tag", {"name" => name})
@parser.phase = @parser.phases[:rootElement]
@parser.phase.processStartTag(name, attributes)
end
def processEndTag(name)
parse_error(_("Unexpected end tag (#{name}). Expected DOCTYPE."))
parse_error("expected-doctype-but-got-end-tag", {"name" => name})
@parser.phase = @parser.phases[:rootElement]
@parser.phase.processEndTag(name)
end
end
end

View file

@ -87,13 +87,13 @@ module HTML5
@tree.generateImpliedEndTags
if @tree.open_elements.length > 2
parse_error(_('Unexpected end of file. Missing closing tags.'))
parse_error("expected-closing-tag-but-got-eof")
elsif @tree.open_elements.length == 2 and @tree.open_elements[1].name != 'body'
# This happens for framesets or something?
parse_error(_("Unexpected end of file. Expected end tag (#{@tree.open_elements[1].name}) first."))
parse_error("expected-closing-tag-but-got-eof")
elsif @parser.inner_html and @tree.open_elements.length > 1
# XXX This is not what the specification says. Not sure what to do here.
parse_error(_('XXX inner_html EOF'))
parse_error("eof-in-innerhtml")
end
# Betting ends.
end
@ -105,7 +105,7 @@ module HTML5
end
def processDoctype(name, publicId, systemId, correct)
parse_error(_('Unexpected DOCTYPE. Ignored.'))
parse_error("unexpected-doctype")
end
def processSpaceCharacters(data)
@ -118,7 +118,7 @@ module HTML5
def startTagHtml(name, attributes)
if @parser.first_start_tag == false and name == 'html'
parse_error(_('html needs to be the first start tag.'))
parse_error("non-html-root")
end
# XXX Need a check here to see if the first start tag token emitted is
# this token... If it's not, invoke parse_error.
@ -134,10 +134,6 @@ module HTML5
send self.class.end_tag_handlers[name], name
end
def _(string)
string
end
def assert(value)
throw AssertionError.new unless value
end

View file

@ -15,19 +15,19 @@ module HTML5
end
def processCharacters(data)
parse_error(_('Unexpected non-space characters. Expected end of file.'))
parse_error("expected-eof-but-got-char")
@parser.phase = @parser.last_phase
@parser.phase.processCharacters(data)
end
def processStartTag(name, attributes)
parse_error(_('Unexpected start tag (#{name}). Expected end of file.'))
parse_error("expected-eof-but-got-start-tag", {"name" => name})
@parser.phase = @parser.last_phase
@parser.phase.processStartTag(name, attributes)
end
def processEndTag(name)
parse_error(_('Unexpected end tag (#{name}). Expected end of file.'))
parse_error("expected-eof-but-got-end-tag", {"name" => name})
@parser.phase = @parser.last_phase
@parser.phase.processEndTag(name)
end

View file

@ -198,8 +198,9 @@ module HTML5
end
end
#TODO: huh?
require 'delegate'
@raw_stream = SimpleDelegator.new(@raw_stream)
# @raw_stream = SimpleDelegator.new(@raw_stream)
class << @raw_stream
def read(chars=-1)
@ -315,8 +316,7 @@ module HTML5
end
when 0x00
@errors.push('null character found in input stream, ' +
'replaced with U+FFFD')
@errors.push("null-character")
[0xFFFD].pack('U') # null characters are invalid
else

View file

@ -50,7 +50,7 @@ module HTML5
when :EndTag
if token[:data]
parse_error(_("End tag contains unexpected attributes."))
parse_error("attributes-in-end-tag")
end
when :Comment
@ -81,7 +81,7 @@ module HTML5
# open and close tags are emitted
if token[:type] == :EndTag
if VOID_ELEMENTS.include? token[:name]
if @tree.open_elements[-1].name != token["name"]:
if @tree.open_elements[-1].name != token["name"]
token[:type] = :EmptyTag
token["data"] ||= {}
end

View file

@ -171,7 +171,6 @@ module HTML5
end
end
def _(string); string; end
end
# Error in serialized tree

View file

@ -69,7 +69,7 @@ module HTML5
if @current_token[:type] == :StartTag and data == ">"
@current_token[:type] = :EmptyTag
else
@token_queue << {:type => :ParseError, :data => _("Solidus (/) incorrectly placed in tag.")}
@token_queue << {:type => :ParseError, :data => "incorrectly-placed-solidus"}
end
# The character we just consumed need to be put back on the stack so it
@ -107,12 +107,12 @@ module HTML5
charAsInt = char_stack.join('').to_i(radix)
if charAsInt == 13
@token_queue << {:type => :ParseError, :data => _("Incorrect CR newline entity. Replaced with LF.")}
@token_queue << {:type => :ParseError, :data => "incorrect-cr-newline-entity"}
charAsInt = 10
elsif (128..159).include? charAsInt
# If the integer is between 127 and 160 (so 128 and bigger and 159
# and smaller) we need to do the "windows trick".
@token_queue << {:type => :ParseError, :data => _("Entity used with illegal number (windows-1252 reference).")}
@token_queue << {:type => :ParseError, :data => "illegal-windows-1252-entity"}
charAsInt = ENTITIES_WINDOWS1252[charAsInt - 128]
end
@ -121,13 +121,13 @@ module HTML5
char = [charAsInt].pack('U')
else
char = [0xFFFD].pack('U')
@token_queue << {:type => :ParseError, :data => _("Numeric entity represents an illegal codepoint.")}
@token_queue << {:type => :ParseError, :data => "cant-convert-numeric-entity", :datavars => {"charAsInt" => charAsInt}}
end
# Discard the ; if present. Otherwise, put it back on the queue and
# invoke parse_error on parser.
if c != ";"
@token_queue << {:type => :ParseError, :data => _("Numeric entity didn't end with ';'.")}
@token_queue << {:type => :ParseError, :data => "numeric-entity-without-semicolon"}
@stream.unget(c)
end
@ -147,7 +147,7 @@ module HTML5
# back in the queue
char_stack = char_stack[0...char_stack.index(:EOF)]
@stream.unget(char_stack)
@token_queue << {:type => :ParseError, :data => _("Numeric entity expected. Got end of file instead.")}
@token_queue << {:type => :ParseError, :data => "expected-numeric-entity-but-got-eof"}
else
if char_stack[1].downcase == "x" and HEX_DIGITS.include? char_stack[2]
# Hexadecimal entity detected.
@ -160,7 +160,7 @@ module HTML5
else
# No number entity detected.
@stream.unget(char_stack)
@token_queue << {:type => :ParseError, :data => _("Numeric entity expected but none found.")}
@token_queue << {:type => :ParseError, :data => "expected-numeric-entity"}
end
end
else
@ -196,10 +196,10 @@ module HTML5
# Check whether or not the last character returned can be
# discarded or needs to be put back.
if entityName[-1] != ?;
@token_queue << {:type => :ParseError, :data => _("Named entity didn't end with ';'.")}
@token_queue << {:type => :ParseError, :data => "named-entity-without-semicolon"}
end
if char_stack[-1] != ";" and from_attribute and
if entityName[-1] != ";" and from_attribute and
(ASCII_LETTERS.include?(char_stack[entityName.length]) or
DIGITS.include?(char_stack[entityName.length]))
@stream.unget(char_stack)
@ -208,7 +208,7 @@ module HTML5
@stream.unget(char_stack[entityName.length..-1])
end
else
@token_queue << {:type => :ParseError, :data => _("Named entity expected. Got none.")}
@token_queue << {:type => :ParseError, :data => "expected-named-entity"}
@stream.unget(char_stack)
end
end
@ -309,19 +309,18 @@ module HTML5
elsif data == ">"
# XXX In theory it could be something besides a tag name. But
# do we really care?
@token_queue << {:type => :ParseError, :data => _("Expected tag name. Got '>' instead.")}
@token_queue << {:type => :ParseError, :data => "expected-tag-name-but-got-right-bracket"}
@token_queue << {:type => :Characters, :data => "<>"}
@state = :data_state
elsif data == "?"
# XXX In theory it could be something besides a tag name. But
# do we really care?
@token_queue.push({:type => :ParseError, :data => _("Expected tag name. Got '?' instead (HTML doesn't " +
"support processing instructions).")})
@token_queue.push({:type => :ParseError, :data => "expected-tag-name-but-got-question-mark"})
@stream.unget(data)
@state = :bogus_comment_state
else
# XXX
@token_queue << {:type => :ParseError, :data => _("Expected tag name. Got something else instead")}
@token_queue << {:type => :ParseError, :data => "expected-tag-name"}
@token_queue << {:type => :Characters, :data => "<"}
@stream.unget(data)
@state = :data_state
@ -382,18 +381,18 @@ module HTML5
data = @stream.char
if data == :EOF
@token_queue << {:type => :ParseError, :data => _("Expected closing tag. Unexpected end of file.")}
@token_queue << {:type => :ParseError, :data => "expected-closing-tag-but-got-eof"}
@token_queue << {:type => :Characters, :data => "</"}
@state = :data_state
elsif ASCII_LETTERS.include? data
@current_token = {:type => :EndTag, :name => data, :data => []}
@state = :tag_name_state
elsif data == ">"
@token_queue << {:type => :ParseError, :data => _("Expected closing tag. Got '>' instead. Ignoring '</>'.")}
@token_queue << {:type => :ParseError, :data => "expected-closing-tag-but-got-right-bracket"}
@state = :data_state
else
# XXX data can be _'_...
@token_queue << {:type => :ParseError, :data => _("Expected closing tag. Unexpected character '#{data}' found.")}
@token_queue << {:type => :ParseError, :data => "expected-closing-tag-but-got-char", :datavars => {:data => data}}
@stream.unget(data)
@state = :bogus_comment_state
end
@ -406,7 +405,7 @@ module HTML5
if SPACE_CHARACTERS.include? data
@state = :before_attribute_name_state
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in the tag name.")}
@token_queue << {:type => :ParseError, :data => "eof-in-tag-name"}
emit_current_token
elsif ASCII_LETTERS.include? data
@current_token[:name] += data + @stream.chars_until(ASCII_LETTERS, true)
@ -426,7 +425,7 @@ module HTML5
if SPACE_CHARACTERS.include? data
@stream.chars_until(SPACE_CHARACTERS, true)
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file. Expected attribute name instead.")}
@token_queue << {:type => :ParseError, :data => "expected-attribute-name-but-got-eof"}
emit_current_token
elsif ASCII_LETTERS.include? data
@current_token[:data].push([data, ""])
@ -449,7 +448,7 @@ module HTML5
if data == "="
@state = :before_attribute_value_state
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in attribute name.")}
@token_queue << {:type => :ParseError, :data => "eof-in-attribute-name"}
@state = :data_state
emitToken = true
elsif ASCII_LETTERS.include? data
@ -479,7 +478,7 @@ module HTML5
end
@current_token[:data][0...-1].each {|name,value|
if @current_token[:data].last.first == name
@token_queue << {:type => :ParseError, :data =>_("Dropped duplicate attribute on tag.")}
@token_queue << {:type => :ParseError, :data => "duplicate-attribute"}
break # don't report an error more than once
end
}
@ -498,7 +497,7 @@ module HTML5
elsif data == ">"
emit_current_token
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file. Expected = or end of tag.")}
@token_queue << {:type => :ParseError, :data => "expected-end-of-tag-but-got-eof"}
emit_current_token
elsif ASCII_LETTERS.include? data
@current_token[:data].push([data, ""])
@ -527,7 +526,7 @@ module HTML5
elsif data == ">"
emit_current_token
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file. Expected attribute value.")}
@token_queue << {:type => :ParseError, :data => "expected-attribute-value-but-got-eof"}
emit_current_token
else
@current_token[:data][-1][1] += data
@ -543,7 +542,7 @@ module HTML5
elsif data == "&"
process_entity_in_attribute
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in attribute value (\").")}
@token_queue << {:type => :ParseError, :data => "eof-in-attribute-value-double-quote"}
emit_current_token
else
@current_token[:data][-1][1] += data + @stream.chars_until(["\"", "&"])
@ -558,7 +557,7 @@ module HTML5
elsif data == "&"
process_entity_in_attribute
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in attribute value (').")}
@token_queue << {:type => :ParseError, :data => "eof-in-attribute-value-single-quote"}
emit_current_token
else
@current_token[:data][-1][1] += data +\
@ -576,7 +575,7 @@ module HTML5
elsif data == ">"
emit_current_token
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in attribute value.")}
@token_queue << {:type => :ParseError, :data => "eof-in-attribute-value-no-quotes"}
emit_current_token
else
@current_token[:data][-1][1] += data + @stream.chars_until(["&", ">","<"] + SPACE_CHARACTERS)
@ -609,7 +608,7 @@ module HTML5
@current_token = {:type => :Doctype, :name => "", :publicId => nil, :systemId => nil, :correct => true}
@state = :doctype_state
else
@token_queue << {:type => :ParseError, :data => _("Expected '--' or 'DOCTYPE'. Not found.")}
@token_queue << {:type => :ParseError, :data => "expected-dashes-or-doctype"}
@stream.unget(char_stack)
@state = :bogus_comment_state
end
@ -622,11 +621,11 @@ module HTML5
if data == "-"
@state = :comment_start_dash_state
elsif data == ">"
@token_queue << {:type => :ParseError, :data => _("Incorrect comment.")}
@token_queue << {:type => :ParseError, :data => "incorrect-comment"}
@token_queue << @current_token
@state = :data_state
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in comment.")}
@token_queue << {:type => :ParseError, :data => "eof-in-comment"}
@token_queue << @current_token
@state = :data_state
else
@ -641,11 +640,11 @@ module HTML5
if data == "-"
@state = :comment_end_state
elsif data == ">"
@token_queue << {:type => :ParseError, :data => _("Incorrect comment.")}
@token_queue << {:type => :ParseError, :data => "incorrect-comment"}
@token_queue << @current_token
@state = :data_state
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in comment.")}
@token_queue << {:type => :ParseError, :data => "eof-in-comment"}
@token_queue << @current_token
@state = :data_state
else
@ -660,7 +659,7 @@ module HTML5
if data == "-"
@state = :comment_end_dash_state
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in comment.")}
@token_queue << {:type => :ParseError, :data => "eof-in-comment"}
@token_queue << @current_token
@state = :data_state
else
@ -674,7 +673,7 @@ module HTML5
if data == "-"
@state = :comment_end_state
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in comment (-)")}
@token_queue << {:type => :ParseError, :data => "eof-in-comment-end-dash"}
@token_queue << @current_token
@state = :data_state
else
@ -694,15 +693,15 @@ module HTML5
@token_queue << @current_token
@state = :data_state
elsif data == "-"
@token_queue << {:type => :ParseError, :data => _("Unexpected '-' after '--' found in comment.")}
@token_queue << {:type => :ParseError, :data => "unexpected-dash-after-double-dash-in-comment"}
@current_token[:data] += data
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in comment (--).")}
@token_queue << {:type => :ParseError, :data => "eof-in-comment-double-dash"}
@token_queue << @current_token
@state = :data_state
else
# XXX
@token_queue << {:type => :ParseError, :data => _("Unexpected character in comment found.")}
@token_queue << {:type => :ParseError, :data => "unexpected-char-in-comment"}
@current_token[:data] += "--" + data
@state = :comment_state
end
@ -714,7 +713,7 @@ module HTML5
if SPACE_CHARACTERS.include? data
@state = :before_doctype_name_state
else
@token_queue << {:type => :ParseError, :data => _("No space after literal string 'DOCTYPE'.")}
@token_queue << {:type => :ParseError, :data => "need-space-after-doctype"}
@stream.unget(data)
@state = :before_doctype_name_state
end
@ -725,12 +724,12 @@ module HTML5
data = @stream.char
if SPACE_CHARACTERS.include? data
elsif data == ">"
@token_queue << {:type => :ParseError, :data => _("Unexpected > character. Expected DOCTYPE name.")}
@token_queue << {:type => :ParseError, :data => "expected-doctype-name-but-got-right-bracket"}
@current_token[:correct] = false
@token_queue << @current_token
@state = :data_state
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file. Expected DOCTYPE name.")}
@token_queue << {:type => :ParseError, :data => "expected-doctype-name-but-got-eof"}
@current_token[:correct] = false
@token_queue << @current_token
@state = :data_state
@ -749,7 +748,7 @@ module HTML5
@token_queue << @current_token
@state = :data_state
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE name.")}
@token_queue << {:type => :ParseError, :data => "eof-in-doctype-name"}
@current_token[:correct] = false
@token_queue << @current_token
@state = :data_state
@ -769,7 +768,7 @@ module HTML5
elsif data == :EOF
@current_token[:correct] = false
@stream.unget(data)
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
@token_queue << @current_token
@state = :data_state
else
@ -782,7 +781,7 @@ module HTML5
@state = :before_doctype_system_identifier_state
else
@stream.unget(char_stack)
@token_queue << {:type => :ParseError, :data => _("Expected 'public' or 'system'. Got '#{token}'")}
@token_queue << {:type => :ParseError, :data => "expected-space-or-right-bracket-in-doctype", "datavars" => {"data" => data}}
@state = :bogus_doctype_state
end
end
@ -800,17 +799,17 @@ module HTML5
@current_token[:publicId] = ""
@state = :doctype_public_identifier_single_quoted_state
elsif data == ">"
@token_queue << {:type => :ParseError, :data => _("Unexpected end of DOCTYPE.")}
@token_queue << {:type => :ParseError, :data => "unexpected-end-of-doctype"}
@current_token[:correct] = false
@token_queue << @current_token
@state = :data_state
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
@current_token[:correct] = false
@token_queue << @current_token
@state = :data_state
else
@token_queue << {:type => :ParseError, :data => _("Unexpected character in DOCTYPE.")}
@token_queue << {:type => :ParseError, :data => "unexpected-char-in-doctype"}
@state = :bogus_doctype_state
end
@ -822,7 +821,7 @@ module HTML5
if data == "\""
@state = :after_doctype_public_identifier_state
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
@current_token[:correct] = false
@token_queue << @current_token
@state = :data_state
@ -837,7 +836,7 @@ module HTML5
if data == "'"
@state = :after_doctype_public_identifier_state
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
@current_token[:correct] = false
@token_queue << @current_token
@state = :data_state
@ -860,12 +859,12 @@ module HTML5
@token_queue << @current_token
@state = :data_state
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
@current_token[:correct] = false
@token_queue << @current_token
@state = :data_state
else
@token_queue << {:type => :ParseError, :data => _("Unexpected character in DOCTYPE.")}
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
@state = :bogus_doctype_state
end
return true
@ -881,17 +880,17 @@ module HTML5
@current_token[:systemId] = ""
@state = :doctype_system_identifier_single_quoted_state
elsif data == ">"
@token_queue << {:type => :ParseError, :data => _("Unexpected character in DOCTYPE.")}
@token_queue << {:type => :ParseError, :data => "unexpected-char-in-doctype"}
@current_token[:correct] = false
@token_queue << @current_token
@state = :data_state
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
@current_token[:correct] = false
@token_queue << @current_token
@state = :data_state
else
@token_queue << {:type => :ParseError, :data => _("Unexpected character in DOCTYPE.")}
@token_queue << {:type => :ParseError, :data => "unexpected-char-in-doctype"}
@state = :bogus_doctype_state
end
return true
@ -902,7 +901,7 @@ module HTML5
if data == "\""
@state = :after_doctype_system_identifier_state
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
@current_token[:correct] = false
@token_queue << @current_token
@state = :data_state
@ -917,7 +916,7 @@ module HTML5
if data == "'"
@state = :after_doctype_system_identifier_state
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
@current_token[:correct] = false
@token_queue << @current_token
@state = :data_state
@ -934,12 +933,12 @@ module HTML5
@token_queue << @current_token
@state = :data_state
elsif data == :EOF
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in DOCTYPE.")}
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
@current_token[:correct] = false
@token_queue << @current_token
@state = :data_state
else
@token_queue << {:type => :ParseError, :data => _("Unexpected character in DOCTYPE.")}
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
@state = :bogus_doctype_state
end
return true
@ -954,7 +953,7 @@ module HTML5
elsif data == :EOF
# XXX EMIT
@stream.unget(data)
@token_queue << {:type => :ParseError, :data => _("Unexpected end of file in bogus doctype.")}
@token_queue << {:type => :ParseError, :data => "eof-in-doctype"}
@current_token[:correct] = false
@token_queue << @current_token
@state = :data_state
@ -962,7 +961,6 @@ module HTML5
return true
end
def _(string); string; end
end
end

View file

@ -412,18 +412,5 @@
"input": "<img src='foo' title='\"foo\" bar' />",
"rexml": "<img src='foo' title='\"foo\" bar' />",
"output": "<img title='&quot;foo&quot; bar' src='foo'/>"
},
{
"name": "named_entities_in_attributes",
"input": "<img src='foo' title='&quot;foo&quot; bar' />",
"rexml": "<img src='foo' title='\"foo\" bar' />",
"output": "<img title='&quot;foo&quot; bar' src='foo'/>"
},
{
"name": "NCRs_in_attributes",
"input": "<img src='foo' title='&#x22;foo&#x22; bar' />",
"rexml": "<img src='foo' title='\"foo\" bar' />",
"output": "<img title='&quot;foo&quot; bar' src='foo'/>"
}
]

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,787 @@
{"tests": [
{"description": "base href contains invalid URI due to leading space",
"input": "<base href=' http://www.example.com/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to trailing space",
"input": "<base href='http://www.example.com/ '",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to space in scheme",
"input": "<base href='ht tp://www.example.com/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to space in domain",
"input": "<base href='http://www.example. com/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to space in path",
"input": "<base href='http://www.example.com/a b'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to space in fragment",
"input": "<base href='http://www.example.com/a#b c'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to space in query",
"input": "<base href='http://www.example.com/a?b c'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to leading tab",
"input": "<base href='\thttp://www.example.com/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to trailing tab",
"input": "<base href='http://www.example.com/\t'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to tab in scheme",
"input": "<base href='ht\ttp://www.example.com/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to tab in domain",
"input": "<base href='http://www.example.\tcom/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to tab in path",
"input": "<base href='http://www.example.com/a\tb'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to tab in fragment",
"input": "<base href='http://www.example.com/a#b\tc'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to tab in query",
"input": "<base href='http://www.example.com/a?b\tc'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to leading LF",
"input": "<base href='\nhttp://www.example.com/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to LF in scheme",
"input": "<base href='ht\ntp://www.example.com/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to LF in domain",
"input": "<base href='http://www.example.\ncom/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to LF in path",
"input": "<base href='http://www.example.com/a\nb'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to LF in fragment",
"input": "<base href='http://www.example.com/a#b\nc'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to LF in query",
"input": "<base href='http://www.example.com/a?b\nc'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to leading LT",
"input": "<base href='\u000Bhttp://www.example.com/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to trailing LT",
"input": "<base href='http://www.example.com/\u000B'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to LT in scheme",
"input": "<base href='ht\u000Btp://www.example.com/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to LT in domain",
"input": "<base href='http://www.example.\u000Bcom/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to LT in path",
"input": "<base href='http://www.example.com/a\u000Bb'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to LT in fragment",
"input": "<base href='http://www.example.com/a#b\u000Bc'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to LT in query",
"input": "<base href='http://www.example.com/a?b\u000Bc'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to leading FF",
"input": "<base href='\u000Chttp://www.example.com/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to trailing FF",
"input": "<base href='http://www.example.com/\u000C'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to FF in scheme",
"input": "<base href='ht\u000Ctp://www.example.com/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to FF in domain",
"input": "<base href='http://www.example.\u000Ccom/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to FF in path",
"input": "<base href='http://www.example.com/a\u000Cb'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to FF in fragment",
"input": "<base href='http://www.example.com/a#b\u000Cc'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to FF in query",
"input": "<base href='http://www.example.com/a?b\u000Cc'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to leading CR",
"input": "<base href='\rhttp://www.example.com/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to CR in scheme",
"input": "<base href='ht\rtp://www.example.com/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to CR in domain",
"input": "<base href='http://www.example.\rcom/'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to CR in path",
"input": "<base href='http://www.example.com/a\rb'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to CR in fragment",
"input": "<base href='http://www.example.com/a#b\rc'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains invalid URI due to CR in query",
"input": "<base href='http://www.example.com/a?b\rc'",
"fail-unless": "invalid-uri-char"},
{"description": "base href contains valid URI scheme 'ftp'",
"input": "<base href='ftp://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'http'",
"input": "<base href='http://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'gopher'",
"input": "<base href='gopher://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'mailto'",
"input": "<base href='mailto://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'news'",
"input": "<base href='news://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'nntp'",
"input": "<base href='nntp://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'telnet'",
"input": "<base href='telnet://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'wais'",
"input": "<base href='wais://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'file'",
"input": "<base href='file://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'prospero'",
"input": "<base href='prospero://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'z39.50s'",
"input": "<base href='z39.50s://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'z39.50r'",
"input": "<base href='z39.50r://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'cid'",
"input": "<base href='cid://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'mid'",
"input": "<base href='mid://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'vemmi'",
"input": "<base href='vemmi://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'service'",
"input": "<base href='service://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'imap'",
"input": "<base href='imap://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'nfs'",
"input": "<base href='nfs://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'acap'",
"input": "<base href='acap://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'rtsp'",
"input": "<base href='rtsp://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'tip'",
"input": "<base href='tip://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'pop'",
"input": "<base href='pop://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'data'",
"input": "<base href='data://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'dav'",
"input": "<base href='dav://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'opaquelocktoken'",
"input": "<base href='opaquelocktoken://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'sip'",
"input": "<base href='sip://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'sips'",
"input": "<base href='sips://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'tel'",
"input": "<base href='tel://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'fax'",
"input": "<base href='fax://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'modem'",
"input": "<base href='modem://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'ldap'",
"input": "<base href='ldap://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'https'",
"input": "<base href='https://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'soap.beep'",
"input": "<base href='soap.beep://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'soap.beeps'",
"input": "<base href='soap.beeps://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'xmlrpc.beep'",
"input": "<base href='xmlrpc.beep://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'xmlrpc.beeps'",
"input": "<base href='xmlrpc.beeps://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'urn'",
"input": "<base href='urn://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'go'",
"input": "<base href='go://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'h323'",
"input": "<base href='h323://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'ipp'",
"input": "<base href='ipp://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'tftp'",
"input": "<base href='tftp://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'mupdate'",
"input": "<base href='mupdate://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'pres'",
"input": "<base href='pres://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'im'",
"input": "<base href='im://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'mtqp'",
"input": "<base href='mtqp://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'iris.beep'",
"input": "<base href='iris.beep://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'dict'",
"input": "<base href='dict://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'snmp'",
"input": "<base href='snmp://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'crid'",
"input": "<base href='crid://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'tag'",
"input": "<base href='tag://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'dns'",
"input": "<base href='dns://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'info'",
"input": "<base href='info://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'javascript'",
"input": "<base href='javascript:foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'FTP'",
"input": "<base href='FTP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'HTTP'",
"input": "<base href='HTTP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'GOPHER'",
"input": "<base href='GOPHER://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'MAILTO'",
"input": "<base href='MAILTO://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'NEWS'",
"input": "<base href='NEWS://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'NNTP'",
"input": "<base href='NNTP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'TELNET'",
"input": "<base href='TELNET://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'WAIS'",
"input": "<base href='WAIS://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'FILE'",
"input": "<base href='FILE://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'PROSPERO'",
"input": "<base href='PROSPERO://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'Z39.50S'",
"input": "<base href='Z39.50S://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'Z39.50R'",
"input": "<base href='Z39.50R://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'CID'",
"input": "<base href='CID://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'MID'",
"input": "<base href='MID://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'VEMMI'",
"input": "<base href='VEMMI://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'SERVICE'",
"input": "<base href='SERVICE://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'IMAP'",
"input": "<base href='IMAP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'NFS'",
"input": "<base href='NFS://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'ACAP'",
"input": "<base href='ACAP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'RTSP'",
"input": "<base href='RTSP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'TIP'",
"input": "<base href='TIP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'POP'",
"input": "<base href='POP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'DATA'",
"input": "<base href='DATA://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'DAV'",
"input": "<base href='DAV://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'OPAQUELOCKTOKEN'",
"input": "<base href='OPAQUELOCKTOKEN://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'SIP'",
"input": "<base href='SIP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'SIPS'",
"input": "<base href='SIPS://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'TEL'",
"input": "<base href='TEL://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'FAX'",
"input": "<base href='FAX://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'MODEM'",
"input": "<base href='MODEM://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'LDAP'",
"input": "<base href='LDAP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'HTTPS'",
"input": "<base href='HTTPS://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'SOAP.BEEP'",
"input": "<base href='SOAP.BEEP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'SOAP.BEEPS'",
"input": "<base href='SOAP.BEEPS://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'XMLRPC.BEEP'",
"input": "<base href='XMLRPC.BEEP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'XMLRPC.BEEPS'",
"input": "<base href='XMLRPC.BEEPS://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'URN'",
"input": "<base href='URN://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'GO'",
"input": "<base href='GO://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'H323'",
"input": "<base href='H323://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'IPP'",
"input": "<base href='IPP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'TFTP'",
"input": "<base href='TFTP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'MUPDATE'",
"input": "<base href='MUPDATE://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'PRES'",
"input": "<base href='PRES://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'IM'",
"input": "<base href='IM://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'MTQP'",
"input": "<base href='MTQP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'IRIS.BEEP'",
"input": "<base href='IRIS.BEEP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'DICT'",
"input": "<base href='DICT://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'SNMP'",
"input": "<base href='SNMP://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'CRID'",
"input": "<base href='CRID://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'TAG'",
"input": "<base href='TAG://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'DNS'",
"input": "<base href='DNS://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'INFO'",
"input": "<base href='INFO://foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'JAVASCRIPT'",
"input": "<base href='JAVASCRIPT:foo'",
"fail-if": "invalid-scheme"},
{"description": "base href contains valid URI scheme 'foo'",
"input": "<base href='foo:bar'",
"fail-unless": "invalid-scheme"},
{"description": "base href contains valid URI 'g'",
"input": "<base href='g'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI './g'",
"input": "<base href='./g'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'g/'",
"input": "<base href='g/'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '/g'",
"input": "<base href='/g'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '//g'",
"input": "<base href='//g'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '?y'",
"input": "<base href='?y'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'g?y'",
"input": "<base href='g?y'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '#s'",
"input": "<base href='#s'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'g#s'",
"input": "<base href='g#s'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'g?y#s'",
"input": "<base href='g?y#s'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI ';x'",
"input": "<base href=';x'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'g;x'",
"input": "<base href='g;x'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'g;x?y#s'",
"input": "<base href='g;x?y#s'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '.'",
"input": "<base href='.'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI './'",
"input": "<base href='./'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '..'",
"input": "<base href='..'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '../'",
"input": "<base href='../'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '../g'",
"input": "<base href='../g'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '../..'",
"input": "<base href='../..'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '../../'",
"input": "<base href='../../'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '../../g'",
"input": "<base href='../../g'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '../../../g'",
"input": "<base href='../../../g'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '../../../../g'",
"input": "<base href='../../../../g'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '/./g'",
"input": "<base href='/./g'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '/../g'",
"input": "<base href='/../g'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'g.'",
"input": "<base href='g.'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '.g'",
"input": "<base href='.g'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'g..'",
"input": "<base href='g..'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI '..g'",
"input": "<base href='..g'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI './../g'",
"input": "<base href='./../g'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI './g/.'",
"input": "<base href='./g/.'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'g/./h'",
"input": "<base href='g/./h'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'g/../h'",
"input": "<base href='g/../h'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'g;x=1/./y'",
"input": "<base href='g;x=1/./y'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'g;x=1/../y'",
"input": "<base href='g;x=1/../y'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'g?y/./x'",
"input": "<base href='g?y/./x'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'g?y/../x'",
"input": "<base href='g?y/../x'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'g#s/./x'",
"input": "<base href='g#s/./x'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'g#s/../x'",
"input": "<base href='g#s/../x'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI './g:h'",
"input": "<base href='./g:h'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'http://www.w%33.org'",
"input": "<base href='http://www.w%33.org'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'http://r%C3%A4ksm%C3%B6rg%C3%A5s.josefsson.org'",
"input": "<base href='http://r%C3%A4ksm%C3%B6rg%C3%A5s.josefsson.org'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'http://xn--rksmrgs-5wao1o.josefsson.org'",
"input": "<base href='http://xn--rksmrgs-5wao1o.josefsson.org'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'http://%E7%B4%8D%E8%B1%86.w3.mag.keio.ac.jp'",
"input": "<base href='http://%E7%B4%8D%E8%B1%86.w3.mag.keio.ac.jp'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'http://xn--99zt52a.w3.mag.keio.ac.jp'",
"input": "<base href='http://xn--99zt52a.w3.mag.keio.ac.jp'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'http://www.%E3%81%BB%E3%82%93%E3%81%A8%E3%81%86%E3%81%AB%E3%81%AA%E3%81%8C%E3%81%84%E3%82%8F%E3%81%91%E3%81%AE%E3%82%8F%E3%81%8B%E3%82%89%E3%81%AA%E3%81%84%E3%81%A9%E3%82%81%E3%81%84%E3%82%93%E3%82%81%E3%81%84%E3%81%AE%E3%82%89%E3%81%B9%E3%82%8B%E3%81%BE%E3%81%A0%E3%81%AA%E3%81%8C%E3%81%8F%E3%81%97%E3%81%AA%E3%81%84%E3%81%A8%E3%81%9F%E3%82%8A%E3%81%AA%E3%81%84.w3.mag.keio.ac.jp/'",
"input": "<base href='http://www.%E3%81%BB%E3%82%93%E3%81%A8%E3%81%86%E3%81%AB%E3%81%AA%E3%81%8C%E3%81%84%E3%82%8F%E3%81%91%E3%81%AE%E3%82%8F%E3%81%8B%E3%82%89%E3%81%AA%E3%81%84%E3%81%A9%E3%82%81%E3%81%84%E3%82%93%E3%82%81%E3%81%84%E3%81%AE%E3%82%89%E3%81%B9%E3%82%8B%E3%81%BE%E3%81%A0%E3%81%AA%E3%81%8C%E3%81%8F%E3%81%97%E3%81%AA%E3%81%84%E3%81%A8%E3%81%9F%E3%82%8A%E3%81%AA%E3%81%84.w3.mag.keio.ac.jp/'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'http://www.xn--n8jaaaaai5bhf7as8fsfk3jnknefdde3fg11amb5gzdb4wi9bya3kc6lra.w3.mag.keio.ac.jp/'",
"input": "<base href='http://www.xn--n8jaaaaai5bhf7as8fsfk3jnknefdde3fg11amb5gzdb4wi9bya3kc6lra.w3.mag.keio.ac.jp/'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'http://%E3%81%BB%E3%82%93%E3%81%A8%E3%81%86%E3%81%AB%E3%81%AA%E3%81%8C%E3%81%84%E3%82%8F%E3%81%91%E3%81%AE%E3%82%8F%E3%81%8B%E3%82%89%E3%81%AA%E3%81%84%E3%81%A9%E3%82%81%E3%81%84%E3%82%93%E3%82%81%E3%81%84%E3%81%AE%E3%82%89%E3%81%B9%E3%82%8B%E3%81%BE%E3%81%A0%E3%81%AA%E3%81%8C%E3%81%8F%E3%81%97%E3%81%AA%E3%81%84%E3%81%A8%E3%81%9F%E3%82%8A%E3%81%AA%E3%81%84.%E3%81%BB%E3%82%93%E3%81%A8%E3%81%86%E3%81%AB%E3%81%AA%E3%81%8C%E3%81%84%E3%82%8F%E3%81%91%E3%81%AE%E3%82%8F%E3%81%8B%E3%82%89%E3%81%AA%E3%81%84%E3%81%A9%E3%82%81%E3%81%84%E3%82%93%E3%82%81%E3%81%84%E3%81%AE%E3%82%89%E3%81%B9%E3%82%8B%E3%81%BE%E3%81%A0%E3%81%AA%E3%81%8C%E3%81%8F%E3%81%97%E3%81%AA%E3%81%84%E3%81%A8%E3%81%9F%E3%82%8A%E3%81%AA%E3%81%84.%E3%81%BB%E3%82%93%E3%81%A8%E3%81%86%E3%81%AB%E3%81%AA%E3%81%8C%E3%81%84%E3%82%8F%E3%81%91%E3%81%AE%E3%82%8F%E3%81%8B%E3%82%89%E3%81%AA%E3%81%84%E3%81%A9%E3%82%81%E3%81%84%E3%82%93%E3%82%81%E3%81%84%E3%81%AE%E3%82%89%E3%81%B9%E3%82%8B%E3%81%BE%E3%81%A0%E3%81%AA%E3%81%8C%E3%81%8F%E3%81%97%E3%81%AA%E3%81%84%E3%81%A8%E3%81%9F%E3%82%8A%E3%81%AA%E3%81%84.w3.mag.keio.ac.jp/'",
"input": "<base href='http://%E3%81%BB%E3%82%93%E3%81%A8%E3%81%86%E3%81%AB%E3%81%AA%E3%81%8C%E3%81%84%E3%82%8F%E3%81%91%E3%81%AE%E3%82%8F%E3%81%8B%E3%82%89%E3%81%AA%E3%81%84%E3%81%A9%E3%82%81%E3%81%84%E3%82%93%E3%82%81%E3%81%84%E3%81%AE%E3%82%89%E3%81%B9%E3%82%8B%E3%81%BE%E3%81%A0%E3%81%AA%E3%81%8C%E3%81%8F%E3%81%97%E3%81%AA%E3%81%84%E3%81%A8%E3%81%9F%E3%82%8A%E3%81%AA%E3%81%84.%E3%81%BB%E3%82%93%E3%81%A8%E3%81%86%E3%81%AB%E3%81%AA%E3%81%8C%E3%81%84%E3%82%8F%E3%81%91%E3%81%AE%E3%82%8F%E3%81%8B%E3%82%89%E3%81%AA%E3%81%84%E3%81%A9%E3%82%81%E3%81%84%E3%82%93%E3%82%81%E3%81%84%E3%81%AE%E3%82%89%E3%81%B9%E3%82%8B%E3%81%BE%E3%81%A0%E3%81%AA%E3%81%8C%E3%81%8F%E3%81%97%E3%81%AA%E3%81%84%E3%81%A8%E3%81%9F%E3%82%8A%E3%81%AA%E3%81%84.%E3%81%BB%E3%82%93%E3%81%A8%E3%81%86%E3%81%AB%E3%81%AA%E3%81%8C%E3%81%84%E3%82%8F%E3%81%91%E3%81%AE%E3%82%8F%E3%81%8B%E3%82%89%E3%81%AA%E3%81%84%E3%81%A9%E3%82%81%E3%81%84%E3%82%93%E3%82%81%E3%81%84%E3%81%AE%E3%82%89%E3%81%B9%E3%82%8B%E3%81%BE%E3%81%A0%E3%81%AA%E3%81%8C%E3%81%8F%E3%81%97%E3%81%AA%E3%81%84%E3%81%A8%E3%81%9F%E3%82%8A%E3%81%AA%E3%81%84.w3.mag.keio.ac.jp/'",
"fail-if": "invalid-attribute-value"},
{"description": "base href contains valid URI 'http://xn--n8jaaaaai5bhf7as8fsfk3jnknefdde3fg11amb5gzdb4wi9bya3kc6lra.xn--n8jaaaaai5bhf7as8fsfk3jnknefdde3fg11amb5gzdb4wi9bya3kc6lra.xn--n8jaaaaai5bhf7as8fsfk3jnknefdde3fg11amb5gzdb4wi9bya3kc6lra.w3.mag.keio.ac.jp/'",
"input": "<base href='http://xn--n8jaaaaai5bhf7as8fsfk3jnknefdde3fg11amb5gzdb4wi9bya3kc6lra.xn--n8jaaaaai5bhf7as8fsfk3jnknefdde3fg11amb5gzdb4wi9bya3kc6lra.xn--n8jaaaaai5bhf7as8fsfk3jnknefdde3fg11amb5gzdb4wi9bya3kc6lra.w3.mag.keio.ac.jp/'",
"fail-if": "invalid-attribute-value"}
]}

View file

@ -0,0 +1,35 @@
{"tests": [
{"description": "valid base target attribute '_self'",
"input": "<base target=_self>",
"fail-if": "invalid-browsing-context"},
{"description": "valid base target attribute '_parent'",
"input": "<base target=_parent>",
"fail-if": "invalid-browsing-context"},
{"description": "valid base target attribute '_top'",
"input": "<base target=_top>",
"fail-if": "invalid-browsing-context"},
{"description": "valid base target attribute '_blank'",
"input": "<base target=_blank>",
"fail-if": "invalid-browsing-context"},
{"description": "valid base target attribute 'foo'",
"input": "<base target=foo>",
"fail-if": "invalid-browsing-context"},
{"description": "base target attribute may be blank",
"input": "<base target>",
"fail-if": "invalid-browsing-context"},
{"description": "invalid base target attribute '_'",
"input": "<base target=_>",
"fail-unless": "invalid-browsing-context"},
{"description": "invalid base target attribute '_foo'",
"input": "<base target=_foo>",
"fail-unless": "invalid-browsing-context"}
]}

View file

@ -0,0 +1,7 @@
{"tests": [
{"description": "blockquote cite contains invalid URI due to space in domain",
"input": "<blockquote cite='http://www.example. com/'",
"fail-unless": "invalid-uri-char"}
]}

View file

@ -17,67 +17,63 @@
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading tab",
"input": "<span class=' a'>",
"input": "<span class='\ta'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing tab",
"input": "<span class='a '>",
"input": "<span class='a\t'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing tab",
"input": "<span class=' a '>",
"input": "<span class='\ta\t'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading LF",
"input": "<span class='
a'>",
"input": "<span class='\na'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing LF",
"input": "<span class='a
'>",
"input": "<span class='a\n'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing LF",
"input": "<span class='
a
'>",
"input": "<span class='\na\n'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading LT",
"input": "<span class=' a'>",
"input": "<span class='\u000Ba'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing LT",
"input": "<span class='a '>",
"input": "<span class='a\u000B'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing LT",
"input": "<span class=' a '>",
"input": "<span class='\u000Ba\u000B'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading FF",
"input": "<span class=' a'>",
"input": "<span class='\u000Ca'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing FF",
"input": "<span class='a '>",
"input": "<span class='a\u000C'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing FF",
"input": "<span class=' a '>",
"input": "<span class='\u000Ca\u000C'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading CR",
"input": "<span class=' a'>",
"input": "<span class='\ra'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with trailing CR",
"input": "<span class='a '>",
"input": "<span class='a\r'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid single class attribute value with leading and trailing CR",
"input": "<span class=' a '>",
"input": "<span class='\ra\r'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by space",
@ -85,24 +81,23 @@ a
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by tab",
"input": "<span class='a b'>",
"input": "<span class='a\tb'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by LF",
"input": "<span class='a
b'>",
"input": "<span class='a\nb'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by LT",
"input": "<span class='a b'>",
"input": "<span class='a\u000Bb'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by FF",
"input": "<span class='a b'>",
"input": "<span class='a\u000Cb'>",
"fail-if": "invalid-attribute-value"},
{"description": "valid double class attribute value separated by CR",
"input": "<span class='a b'>",
"input": "<span class='a\rb'>",
"fail-if": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by space",
@ -110,24 +105,23 @@ b'>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by tab",
"input": "<span class='a a'>",
"input": "<span class='a\ta'>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by LF",
"input": "<span class='a
a'>",
"input": "<span class='a\na'>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by LT",
"input": "<span class='a a'>",
"input": "<span class='a\u000Ba'>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by FF",
"input": "<span class='a a'>",
"input": "<span class='a\u000Ca'>",
"fail-unless": "invalid-attribute-value"},
{"description": "invalid duplicated class attribute value separated by CR",
"input": "<span class='a a'>",
"input": "<span class='a\ra'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by space",
@ -135,24 +129,23 @@ a'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by tab",
"input": "<span class='a a'>",
"input": "<span class='a\ta'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by LF",
"input": "<span class='a
a'>",
"input": "<span class='a\na'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by LT",
"input": "<span class='a a'>",
"input": "<span class='a\u000Ba'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by FF",
"input": "<span class='a a'>",
"input": "<span class='a\u000Ca'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated class attribute value separated by CR",
"input": "<span class='a a'>",
"input": "<span class='a\ra'>",
"fail-unless": "duplicate-value-in-token-list"}
]}

View file

@ -34,26 +34,26 @@
{"description": "invalid contenteditable attribute value 'foo'",
"input": "<span contenteditable=foo>",
"fail-unless": "invalid-attribute-value"},
"fail-unless": "invalid-enumerated-value"},
{"description": "invalid contenteditable attribute value '0'",
"input": "<span contenteditable=0>",
"fail-unless": "invalid-attribute-value"},
"fail-unless": "invalid-enumerated-value"},
{"description": "invalid contenteditable attribute value '1'",
"input": "<span contenteditable=1>",
"fail-unless": "invalid-attribute-value"},
"fail-unless": "invalid-enumerated-value"},
{"description": "invalid contenteditable attribute value 'yes'",
"input": "<span contenteditable=yes>",
"fail-unless": "invalid-attribute-value"},
"fail-unless": "invalid-enumerated-value"},
{"description": "invalid contenteditable attribute value 'no'",
"input": "<span contenteditable=no>",
"fail-unless": "invalid-attribute-value"},
"fail-unless": "invalid-enumerated-value"},
{"description": "invalid contenteditable attribute value 'inherit'",
"input": "<span contenteditable=inherit>",
"fail-unless": "invalid-attribute-value"}
"fail-unless": "invalid-enumerated-value"}
]}

View file

@ -53,66 +53,63 @@
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading tab",
"input": "<span contextmenu=' a'>",
"input": "<span contextmenu='\ta'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing tab",
"input": "<span contextmenu='a '>",
"input": "<span contextmenu='a\t'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of tab in value",
"input": "<span contextmenu='a b'>",
"input": "<span contextmenu='a\tb'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading LF",
"input": "<span contextmenu='
a'>",
"input": "<span contextmenu='\na'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing LF",
"input": "<span contextmenu='a
'>",
"input": "<span contextmenu='a\n'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of LF in value",
"input": "<span contextmenu='a
b'>",
"input": "<span contextmenu='a\nb'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading LT",
"input": "<span contextmenu=' a'>",
"input": "<span contextmenu='\u000Ba'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing LT",
"input": "<span contextmenu='a '>",
"input": "<span contextmenu='a\u000B'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of LT in value",
"input": "<span contextmenu='a b'>",
"input": "<span contextmenu='a\u000Bb'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading FF",
"input": "<span contextmenu=' a'>",
"input": "<span contextmenu='\u000Ca'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing FF",
"input": "<span contextmenu='a '>",
"input": "<span contextmenu='a\u000C'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of FF in value",
"input": "<span contextmenu='a b'>",
"input": "<span contextmenu='a\u000Cb'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading CR",
"input": "<span contextmenu=' a'>",
"input": "<span contextmenu='\ra'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing CR",
"input": "<span contextmenu='a '>",
"input": "<span contextmenu='a\r'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of CR in value",
"input": "<span contextmenu='a b'>",
"input": "<span contextmenu='a\rb'>",
"fail-unless": "space-in-id"}
]}
]}

View file

@ -0,0 +1,59 @@
{"tests": [
{"description": "valid dir attribute value 'ltr'",
"input": "<span dir=ltr>",
"fail-if": "invalid-attribute-value"},
{"description": "valid dir attribute value 'LTR'",
"input": "<span dir=LTR>",
"fail-if": "invalid-attribute-value"},
{"description": "valid dir attribute value 'LtR'",
"input": "<span dir=LtR>",
"fail-if": "invalid-attribute-value"},
{"description": "valid dir attribute value 'rtl'",
"input": "<span dir=rtl>",
"fail-if": "invalid-attribute-value"},
{"description": "valid dir attribute value 'RTL'",
"input": "<span dir=RTL>",
"fail-if": "invalid-attribute-value"},
{"description": "valid dir attribute value 'RtL'",
"input": "<span dir=RtL>",
"fail-if": "invalid-attribute-value"},
{"description": "invalid dir attribute value due to leading space",
"input": "<span dir=' ltr'>",
"fail-unless": "invalid-enumerated-value"},
{"description": "dir attribute value can not be blank",
"input": "<span dir>",
"fail-unless": "attribute-value-can-not-be-blank"},
{"description": "dir attribute value can not be blank (with quotes)",
"input": "<span dir=''>",
"fail-unless": "attribute-value-can-not-be-blank"},
{"description": "invalid dir attribute value 'left'",
"input": "<span dir=left>",
"fail-unless": "invalid-enumerated-value"},
{"description": "invalid dir attribute value 'right'",
"input": "<span dir=right>",
"fail-unless": "invalid-enumerated-value"},
{"description": "invalid dir attribute value 'lefttoright'",
"input": "<span dir=lefttoright>",
"fail-unless": "invalid-enumerated-value"},
{"description": "invalid dir attribute value 'righttoleft'",
"input": "<span dir=righttoleft>",
"fail-unless": "invalid-enumerated-value"},
{"description": "invalid dir attribute value 'inherit'",
"input": "<span dir=inherit>",
"fail-unless": "invalid-enumerated-value"}
]}

View file

@ -0,0 +1,63 @@
{"tests": [
{"description": "valid draggable attribute value 'true'",
"input": "<span draggable=true>",
"fail-if": "invalid-attribute-value"},
{"description": "valid draggable attribute value 'TRUE'",
"input": "<span draggable=TRUE>",
"fail-if": "invalid-attribute-value"},
{"description": "valid draggable attribute value 'TrUe'",
"input": "<span draggable=TrUe>",
"fail-if": "invalid-attribute-value"},
{"description": "valid draggable attribute value 'false'",
"input": "<span draggable=false>",
"fail-if": "invalid-attribute-value"},
{"description": "valid draggable attribute value 'FALSE'",
"input": "<span draggable=FALSE>",
"fail-if": "invalid-attribute-value"},
{"description": "valid draggable attribute value 'FalSe'",
"input": "<span draggable=FalSe>",
"fail-if": "invalid-attribute-value"},
{"description": "invalid draggable attribute value ''",
"input": "<span draggable=''>",
"fail-unless": "attribute-value-can-not-be-blank"},
{"description": "valid draggable attribute value (not specified)",
"input": "<span draggable>",
"fail-unless": "attribute-value-can-not-be-blank"},
{"description": "invalid draggable attribute value 'foo'",
"input": "<span draggable=foo>",
"fail-unless": "invalid-enumerated-value"},
{"description": "invalid draggable attribute value '0'",
"input": "<span draggable=0>",
"fail-unless": "invalid-enumerated-value"},
{"description": "invalid draggable attribute value '1'",
"input": "<span draggable=1>",
"fail-unless": "invalid-enumerated-value"},
{"description": "invalid draggable attribute value 'yes'",
"input": "<span draggable=yes>",
"fail-unless": "invalid-enumerated-value"},
{"description": "invalid draggable attribute value 'no'",
"input": "<span draggable=no>",
"fail-unless": "invalid-enumerated-value"},
{"description": "invalid draggable attribute value 'auto'",
"input": "<span draggable=auto>",
"fail-unless": "invalid-enumerated-value"},
{"description": "invalid draggable attribute value 'inherit'",
"input": "<span draggable=inherit>",
"fail-unless": "invalid-enumerated-value"}
]}

View file

@ -0,0 +1,23 @@
{"tests": [
{"description": "valid html xmlns attribute",
"input": "<html xmlns=http://www.w3.org/1999/xhtml>",
"fail-if": "invalid-root-namespace"},
{"description": "invalid html xmlns attribute due to leading space",
"input": "<html xmlns=' http://www.w3.org/1999/xhtml'>",
"fail-unless": "invalid-root-namespace"},
{"description": "invalid html xmlns attribute due to trailing space",
"input": "<html xmlns='http://www.w3.org/1999/xhtml '>",
"fail-unless": "invalid-root-namespace"},
{"description": "invalid html xmlns attribute due to uppercase",
"input": "<html xmlns=HTTP://WWW.W3.ORG/1999/XHTML>",
"fail-unless": "invalid-root-namespace"},
{"description": "invalid xmlns attribute on non-html element",
"input": "<body xmlns=http://www.w3.org/1999/xhtml>",
"fail-unless": "unknown-attribute"}
]}

View file

@ -33,66 +33,63 @@
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading tab",
"input": "<span id=' a'>",
"input": "<span id='\ta'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing tab",
"input": "<span id='a '>",
"input": "<span id='a\t'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of tab in value",
"input": "<span id='a b'>",
"input": "<span id='a\tb'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading LF",
"input": "<span id='
a'>",
"input": "<span id='\na'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing LF",
"input": "<span id='a
'>",
"input": "<span id='a\n'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of LF in value",
"input": "<span id='a
b'>",
"input": "<span id='a\nb'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading LT",
"input": "<span id=' a'>",
"input": "<span id='\u000Ba'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing LT",
"input": "<span id='a '>",
"input": "<span id='a\u000B'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of LT in value",
"input": "<span id='a b'>",
"input": "<span id='a\u000Bb'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading FF",
"input": "<span id=' a'>",
"input": "<span id='\u000Ca'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing FF",
"input": "<span id='a '>",
"input": "<span id='a\u000C'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of FF in value",
"input": "<span id='a b'>",
"input": "<span id='a\u000Cb'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of leading CR",
"input": "<span id=' a'>",
"input": "<span id='\ra'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of trailing CR",
"input": "<span id='a '>",
"input": "<span id='a\r'>",
"fail-unless": "space-in-id"},
{"description": "invalid ID because of CR in value",
"input": "<span id='a b'>",
"input": "<span id='a\rb'>",
"fail-unless": "space-in-id"},
{"description": "duplicate ID values",
@ -115,4 +112,4 @@ b'>",
"input": "<span id=a><span id=A>",
"fail-if": "duplicate-id"}
]}
]}

View file

@ -0,0 +1,63 @@
{"tests": [
{"description": "valid irrelevant attribute value 'irrelevant'",
"input": "<span irrelevant=irrelevant>",
"fail-if": "invalid-attribute-value"},
{"description": "valid irrelevant attribute value ''",
"input": "<span irrelevant=''>",
"fail-if": "invalid-attribute-value"},
{"description": "invalid irrelevant attribute value due to uppercase",
"input": "<span irrelevant=IRRELEVANT>",
"fail-unless": "invalid-boolean-value"},
{"description": "invalid irrelevant attribute value due to mixed case",
"input": "<span irrelevant=IrReLeVaNt>",
"fail-unless": "invalid-boolean-value"},
{"description": "invalid irrelevant attribute value due to leading space",
"input": "<span irrelevant=' irrelevant'>",
"fail-unless": "invalid-boolean-value"},
{"description": "invalid irrelevant attribute value due to trailing space",
"input": "<span irrelevant='irrelevant '>",
"fail-unless": "invalid-boolean-value"},
{"description": "invalid irrelevant attribute value 'foo'",
"input": "<span irrelevant=foo>",
"fail-unless": "invalid-boolean-value"},
{"description": "invalid irrelevant attribute value '0'",
"input": "<span irrelevant=0>",
"fail-unless": "invalid-boolean-value"},
{"description": "invalid irrelevant attribute value '1'",
"input": "<span irrelevant=1>",
"fail-unless": "invalid-boolean-value"},
{"description": "invalid irrelevant attribute value 'yes'",
"input": "<span irrelevant=yes>",
"fail-unless": "invalid-boolean-value"},
{"description": "invalid irrelevant attribute value 'no'",
"input": "<span irrelevant=no>",
"fail-unless": "invalid-boolean-value"},
{"description": "invalid irrelevant attribute value 'true'",
"input": "<span irrelevant=true>",
"fail-unless": "invalid-boolean-value"},
{"description": "invalid irrelevant attribute value 'false'",
"input": "<span irrelevant=false>",
"fail-unless": "invalid-boolean-value"},
{"description": "invalid irrelevant attribute value 'auto'",
"input": "<span irrelevant=auto>",
"fail-unless": "invalid-boolean-value"},
{"description": "invalid irrelevant attribute value 'inherit'",
"input": "<span irrelevant=inherit>",
"fail-unless": "invalid-boolean-value"}
]}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,7 @@
{"tests": [
{"description": "invalid li value attribute value due to leading junk",
"input": "<li value=a1>",
"fail-unless": "invalid-integer-value"}
]}

View file

@ -0,0 +1,7 @@
{"tests": [
{"description": "link href contains invalid URI due to space in domain",
"input": "<link href='http://www.example. com/'",
"fail-unless": "invalid-uri-char"}
]}

View file

@ -0,0 +1,7 @@
{"tests": [
{"description": "invalid lang attribute 'foo'",
"input": "<link hreflang=foo>",
"fail-unless": "invalid-lang-code"}
]}

View file

@ -0,0 +1,271 @@
{"tests": [
{"description": "invalid link rel value 'foo'",
"input": "<link rel=foo>",
"fail-unless": "invalid-rel"},
{"description": "valid link rel value 'alternate stylesheet'",
"input": "<link rel='alternate stylesheet'>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with leading space",
"input": "<link rel=' stylesheet'>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with trailing space",
"input": "<link rel='stylesheet '>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with leading and trailing space",
"input": "<link rel=' stylesheet '>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with leading tab",
"input": "<link rel='\tstylesheet'>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with trailing tab",
"input": "<link rel='stylesheet\t'>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with leading and trailing tab",
"input": "<link rel='\tstylesheet\t'>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with leading LF",
"input": "<link rel='\nstylesheet'>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with trailing LF",
"input": "<link rel='stylesheet\n'>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with leading and trailing LF",
"input": "<link rel='\nstylesheet\n'>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with leading LT",
"input": "<link rel='\u000Bstylesheet'>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with trailing LT",
"input": "<link rel='stylesheet\u000B'>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with leading and trailing LT",
"input": "<link rel='\u000Bstylesheet\u000B'>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with leading FF",
"input": "<link rel='\u000Cstylesheet'>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with trailing FF",
"input": "<link rel='stylesheet\u000C'>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with leading and trailing FF",
"input": "<link rel='\u000Cstylesheet\u000C'>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with leading CR",
"input": "<link rel='\rstylesheet'>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with trailing CR",
"input": "<link rel='stylesheet\r'>",
"fail-if": "invalid-rel"},
{"description": "valid single link rel value with leading and trailing CR",
"input": "<link rel='\rstylesheet\r'>",
"fail-if": "invalid-rel"},
{"description": "valid double link rel value separated by space",
"input": "<link rel='stylesheet alternate'>",
"fail-if": "invalid-rel"},
{"description": "valid double link rel value separated by tab",
"input": "<link rel='stylesheet\talternate'>",
"fail-if": "invalid-rel"},
{"description": "valid double link rel value separated by LF",
"input": "<link rel='stylesheet\nalternate'>",
"fail-if": "invalid-rel"},
{"description": "valid double link rel value separated by LT",
"input": "<link rel='stylesheet\u000Balternate'>",
"fail-if": "invalid-rel"},
{"description": "valid double link rel value separated by FF",
"input": "<link rel='stylesheet\u000Calternate'>",
"fail-if": "invalid-rel"},
{"description": "valid double link rel value separated by CR",
"input": "<link rel='stylesheet\ralternate'>",
"fail-if": "invalid-rel"},
{"description": "invalid duplicated link rel value separated by space",
"input": "<link rel='stylesheet stylesheet'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated link rel value separated by tab",
"input": "<link rel='stylesheet\tstylesheet'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated link rel value separated by LF",
"input": "<link rel='stylesheet\nstylesheet'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated link rel value separated by LT",
"input": "<link rel='stylesheet\u000Bstylesheet'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated link rel value separated by FF",
"input": "<link rel='stylesheet\u000Cstylesheet'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated link rel value separated by CR",
"input": "<link rel='stylesheet\rstylesheet'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated link rel value separated by space",
"input": "<link rel='stylesheet stylesheet'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated link rel value separated by tab",
"input": "<link rel='stylesheet\tstylesheet'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated link rel value separated by LF",
"input": "<link rel='stylesheet\nstylesheet'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated link rel value separated by LT",
"input": "<link rel='stylesheet\u000Bstylesheet'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated link rel value separated by FF",
"input": "<link rel='stylesheet\u000Cstylesheet'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "invalid duplicated link rel value separated by CR",
"input": "<link rel='stylesheet\rstylesheet'>",
"fail-unless": "duplicate-value-in-token-list"},
{"description": "valid link rel value 'feed'",
"input": "<link rel=feed>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'help'",
"input": "<link rel=help>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'tag'",
"input": "<link rel=tag>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'sidebar'",
"input": "<link rel=sidebar>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'archive'",
"input": "<link rel=archive>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'contents'",
"input": "<link rel=contents>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'previous'",
"input": "<link rel=previous>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'index'",
"input": "<link rel=index>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'end'",
"input": "<link rel=end>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'copyright'",
"input": "<link rel=copyright>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'author'",
"input": "<link rel=author>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'top'",
"input": "<link rel=top>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'alternate'",
"input": "<link rel=alternate>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'next'",
"input": "<link rel=next>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'start'",
"input": "<link rel=start>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'stylesheet'",
"input": "<link rel=stylesheet>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'archives'",
"input": "<link rel=archives>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'toc'",
"input": "<link rel=toc>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'prev'",
"input": "<link rel=prev>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'begin'",
"input": "<link rel=begin>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'icon'",
"input": "<link rel=icon>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'search'",
"input": "<link rel=search>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'last'",
"input": "<link rel=last>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'license'",
"input": "<link rel=license>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'pingback'",
"input": "<link rel=pingback>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'up'",
"input": "<link rel=up>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'contact'",
"input": "<link rel=contact>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'prefetch'",
"input": "<link rel=prefetch>",
"fail-if": "invalid-rel"},
{"description": "valid link rel value 'first'",
"input": "<link rel=first>",
"fail-if": "invalid-rel"}
]}

View file

@ -0,0 +1,7 @@
{"tests": [
{"description": "invalid ol start attribute value due to leading junk",
"input": "<ol start=a1>",
"fail-unless": "invalid-integer-value"}
]}

View file

@ -0,0 +1,7 @@
{"tests": [
{"description": "invalid style scoped attribute value 'inherit'",
"input": "<style scoped=inherit>",
"fail-unless": "invalid-boolean-value"}
]}

View file

@ -0,0 +1,79 @@
{"tests": [
{"description": "valid tabindex attribute value '-1'",
"input": "<span tabindex=-1>",
"fail-if": "invalid-integer-value"},
{"description": "valid tabindex attribute value '0'",
"input": "<span tabindex=0>",
"fail-if": "invalid-integer-value"},
{"description": "valid tabindex attribute value '1'",
"input": "<span tabindex=1>",
"fail-if": "invalid-integer-value"},
{"description": "valid tabindex attribute value '32768'",
"input": "<span tabindex=32768>",
"fail-if": "invalid-integer-value"},
{"description": "valid tabindex attribute value '-32768'",
"input": "<span tabindex=-32768>",
"fail-if": "invalid-integer-value"},
{"description": "valid tabindex attribute value with leading spaces",
"input": "<span tabindex=' -32768'>",
"fail-if": "invalid-integer-value"},
{"description": "valid tabindex attribute value with trailing spaces",
"input": "<span tabindex='-32768 '>",
"fail-if": "invalid-integer-value"},
{"description": "valid tabindex attribute value with trailing junk",
"input": "<span tabindex='32768a'>",
"fail-if": "invalid-integer-value"},
{"description": "valid tabindex attribute value with trailing junk and whitespace",
"input": "<span tabindex='32768a '>",
"fail-if": "invalid-integer-value"},
{"description": "valid tabindex attribute value with trailing whitespace and junk",
"input": "<span tabindex='32768 a'>",
"fail-if": "invalid-integer-value"},
{"description": "valid tabindex attribute value with leading spaces",
"input": "<span tabindex=' 32768'>",
"fail-if": "invalid-integer-value"},
{"description": "valid tabindex attribute value with leading spaces (with sign)",
"input": "<span tabindex=' -32768'>",
"fail-if": "invalid-integer-value"},
{"description": "invalid tabindex attribute value (blank)",
"input": "<span tabindex>",
"fail-unless": "attribute-value-can-not-be-blank"},
{"description": "invalid tabindex attribute value due to leading junk",
"input": "<span tabindex=a1>",
"fail-unless": "invalid-integer-value"},
{"description": "invalid tabindex attribute value due to two hyphens",
"input": "<span tabindex=--1>",
"fail-unless": "invalid-integer-value"},
{"description": "invalid tabindex attribute value due to non-numeric",
"input": "<span tabindex=foo>",
"fail-unless": "invalid-integer-value"},
{"description": "invalid tabindex attribute value due to positive sign",
"input": "<span tabindex=+1>",
"fail-unless": "invalid-integer-value"},
{"description": "invalid tabindex attribute value due to decimal point",
"input": "<span tabindex=.1>",
"fail-unless": "invalid-integer-value"},
{"description": "valid tabindex attribute value with trailing decimal point",
"input": "<span tabindex=1.0>",
"fail-if": "invalid-integer-value"}
]}

View file

@ -68,3 +68,21 @@ module HTML5
end
end
end
class String
alias old_format %
define_method("%") do |data|
unless data.kind_of?(Hash)
$VERBOSE = false
r = old_format(data)
$VERBOSE = true
r
else
ret = self.clone
data.each do |k,v|
ret.gsub!(/\%\(#{k}\)/, v)
end
ret
end
end
end

View file

@ -10,7 +10,7 @@ class Html5EncodingTestCase < Test::Unit::TestCase
require 'rubygems'
require 'UniversalDetector'
def test_chardet
def test_chardet #TODO: can we get rid of this?
file = File.open(File.join(TESTDATA_DIR, 'encoding', 'chardet', 'test_big5.txt'), 'r')
stream = HTML5::HTMLInputStream.new(file, :chardet => true)
assert_equal 'big5', stream.char_encoding.downcase

View file

@ -50,8 +50,8 @@ class Html5ParserTestCase < Test::Unit::TestCase
'', 'Recieved:', actual_output
].join("\n")
actual_errors = parser.errors.map do |(line, col), message|
'Line: %i Col: %i %s' % [line, col, message]
actual_errors = parser.errors.map do |(line, col), message, datavars|
'Line: %i Col: %i %s' % [line, col, E[message] % datavars]
end
assert_equal errors.length, parser.errors.length, [
'', 'Input', input,

View file

@ -0,0 +1,31 @@
#!/usr/bin/env ruby -wKU
require File.join(File.dirname(__FILE__), 'preamble')
require 'html5'
require 'html5/filters/validator'
class TestValidator < Test::Unit::TestCase
def run_validator_test(test)
p = HTML5::HTMLParser.new(:tokenizer => HTMLConformanceChecker)
p.parse(test['input'])
errorCodes = p.errors.collect{|e| e[1]}
if test.has_key?('fail-if')
assert !errorCodes.include?(test['fail-if'])
end
if test.has_key?('fail-unless')
assert errorCodes.include?(test['fail-unless'])
end
end
for filename in html5_test_files('validator')
tests = JSON.load(open(filename))
testName = File.basename(filename).sub(".test", "")
tests['tests'].each_with_index do |test, index|
define_method "test_#{testName}_#{index}" do
run_validator_test(test)
end
end
end
end