diff --git a/vendor/plugins/HTML5lib/bin/html5 b/vendor/plugins/HTML5lib/bin/html5
index 2680aea3..bc0514ad 100755
--- a/vendor/plugins/HTML5lib/bin/html5
+++ b/vendor/plugins/HTML5lib/bin/html5
@@ -81,8 +81,8 @@ def print_output(parser, document, opts)
if opts.error
errList=[]
- for pos, message in parser.errors
- errList << ("Line %i Col %i"%pos + " " + message)
+ for pos, errorcode, datavars in parser.errors
+ errList << "Line %i Col %i"%pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars
end
$stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
end
diff --git a/vendor/plugins/HTML5lib/lib/html5/constants.rb b/vendor/plugins/HTML5lib/lib/html5/constants.rb
index 8ccaf66d..9a4580fa 100755
--- a/vendor/plugins/HTML5lib/lib/html5/constants.rb
+++ b/vendor/plugins/HTML5lib/lib/html5/constants.rb
@@ -2,6 +2,8 @@ module HTML5
class EOF < Exception; end
+ def self._(str); str end
+
CONTENT_MODEL_FLAGS = [
:PCDATA,
:RCDATA,
@@ -815,4 +817,228 @@ module HTML5
hz-gb-2312
]
+ E = {
+ "null-character" =>
+ _("Null character in input stream, replaced with U+FFFD."),
+ "incorrectly-placed-solidus" =>
+ _("Solidus (/) incorrectly placed in tag."),
+ "incorrect-cr-newline-entity" =>
+ _("Incorrect CR newline entity, replaced with LF."),
+ "illegal-windows-1252-entity" =>
+ _("Entity used with illegal number (windows-1252 reference)."),
+ "cant-convert-numeric-entity" =>
+ _("Numeric entity couldn't be converted to character " +
+ "(codepoint U+%(charAsInt)08x)."),
+ "illegal-codepoint-for-numeric-entity" =>
+ _("Numeric entity represents an illegal codepoint=> " +
+ "U+%(charAsInt)08x."),
+ "numeric-entity-without-semicolon" =>
+ _("Numeric entity didn't end with ';'."),
+ "expected-numeric-entity-but-got-eof" =>
+ _("Numeric entity expected. Got end of file instead."),
+ "expected-numeric-entity" =>
+ _("Numeric entity expected but none found."),
+ "named-entity-without-semicolon" =>
+ _("Named entity didn't end with ';'."),
+ "expected-named-entity" =>
+ _("Named entity expected. Got none."),
+ "attributes-in-end-tag" =>
+ _("End tag contains unexpected attributes."),
+ "expected-tag-name-but-got-right-bracket" =>
+ _("Expected tag name. Got '>' instead."),
+ "expected-tag-name-but-got-question-mark" =>
+ _("Expected tag name. Got '?' instead. (HTML doesn't " +
+ "support processing instructions.)"),
+ "expected-tag-name" =>
+ _("Expected tag name. Got something else instead"),
+ "expected-closing-tag-but-got-right-bracket" =>
+ _("Expected closing tag. Got '>' instead. Ignoring '>'."),
+ "expected-closing-tag-but-got-eof" =>
+ _("Expected closing tag. Unexpected end of file."),
+ "expected-closing-tag-but-got-char" =>
+ _("Expected closing tag. Unexpected character '%(data)' found."),
+ "eof-in-tag-name" =>
+ _("Unexpected end of file in the tag name."),
+ "expected-attribute-name-but-got-eof" =>
+ _("Unexpected end of file. Expected attribute name instead."),
+ "eof-in-attribute-name" =>
+ _("Unexpected end of file in attribute name."),
+ "duplicate-attribute" =>
+ _("Dropped duplicate attribute on tag."),
+ "expected-end-of-tag-name-but-got-eof" =>
+ _("Unexpected end of file. Expected = or end of tag."),
+ "expected-attribute-value-but-got-eof" =>
+ _("Unexpected end of file. Expected attribute value."),
+ "eof-in-attribute-value-double-quote" =>
+ _("Unexpected end of file in attribute value (\")."),
+ "eof-in-attribute-value-single-quote" =>
+ _("Unexpected end of file in attribute value (')."),
+ "eof-in-attribute-value-no-quotes" =>
+ _("Unexpected end of file in attribute value."),
+ "expected-dashes-or-doctype" =>
+ _("Expected '--' or 'DOCTYPE'. Not found."),
+ "incorrect-comment" =>
+ _("Incorrect comment."),
+ "eof-in-comment" =>
+ _("Unexpected end of file in comment."),
+ "eof-in-comment-end-dash" =>
+ _("Unexpected end of file in comment (-)"),
+ "unexpected-dash-after-double-dash-in-comment" =>
+ _("Unexpected '-' after '--' found in comment."),
+ "eof-in-comment-double-dash" =>
+ _("Unexpected end of file in comment (--)."),
+ "unexpected-char-in-comment" =>
+ _("Unexpected character in comment found."),
+ "need-space-after-doctype" =>
+ _("No space after literal string 'DOCTYPE'."),
+ "expected-doctype-name-but-got-right-bracket" =>
+ _("Unexpected > character. Expected DOCTYPE name."),
+ "expected-doctype-name-but-got-eof" =>
+ _("Unexpected end of file. Expected DOCTYPE name."),
+ "eof-in-doctype-name" =>
+ _("Unexpected end of file in DOCTYPE name."),
+ "eof-in-doctype" =>
+ _("Unexpected end of file in DOCTYPE."),
+ "expected-space-or-right-bracket-in-doctype" =>
+ _("Expected space or '>'. Got '%(data)'"),
+ "unexpected-end-of-doctype" =>
+ _("Unexpected end of DOCTYPE."),
+ "unexpected-char-in-doctype" =>
+ _("Unexpected character in DOCTYPE."),
+ "eof-in-bogus-doctype" =>
+ _("Unexpected end of file in bogus doctype."),
+ "eof-in-innerhtml" =>
+ _("XXX innerHTML EOF"),
+ "unexpected-doctype" =>
+ _("Unexpected DOCTYPE. Ignored."),
+ "non-html-root" =>
+ _("html needs to be the first start tag."),
+ "expected-doctype-but-got-eof" =>
+ _("Unexpected End of file. Expected DOCTYPE."),
+ "unknown-doctype" =>
+ _("Erroneous DOCTYPE."),
+ "expected-doctype-but-got-chars" =>
+ _("Unexpected non-space characters. Expected DOCTYPE."),
+ "expected-doctype-but-got-start-tag" =>
+ _("Unexpected start tag (%(name)). Expected DOCTYPE."),
+ "expected-doctype-but-got-end-tag" =>
+ _("Unexpected end tag (%(name)). Expected DOCTYPE."),
+ "end-tag-after-implied-root" =>
+ _("Unexpected end tag (%(name)) after the (implied) root element."),
+ "expected-named-closing-tag-but-got-eof" =>
+ _("Unexpected end of file. Expected end tag (%(name))."),
+ "two-heads-are-not-better-than-one" =>
+ _("Unexpected start tag head in existing head. Ignored."),
+ "unexpected-end-tag" =>
+ _("Unexpected end tag (%(name)). Ignored."),
+ "unexpected-start-tag-out-of-my-head" =>
+ _("Unexpected start tag (%(name)) that can be in head. Moved."),
+ "unexpected-start-tag" =>
+ _("Unexpected start tag (%(name))."),
+ "missing-end-tag" =>
+ _("Missing end tag (%(name))."),
+ "missing-end-tags" =>
+ _("Missing end tags (%(name))."),
+ "unexpected-start-tag-implies-end-tag" =>
+ _("Unexpected start tag (%(startName)) " +
+ "implies end tag (%(endName))."),
+ "unexpected-start-tag-treated-as" =>
+ _("Unexpected start tag (%(originalName)). Treated as %(newName)."),
+ "deprecated-tag" =>
+ _("Unexpected start tag %(name). Don't use it!"),
+ "unexpected-start-tag-ignored" =>
+ _("Unexpected start tag %(name). Ignored."),
+ "expected-one-end-tag-but-got-another" =>
+ _("Unexpected end tag (%(gotName)). " +
+ "Missing end tag (%(expectedName))."),
+ "end-tag-too-early" =>
+ _("End tag (%(name)) seen too early. Expected other end tag."),
+ "end-tag-too-early-named" =>
+ _("Unexpected end tag (%(gotName)). Expected end tag (%(expectedName))."),
+ "end-tag-too-early-ignored" =>
+ _("End tag (%(name)) seen too early. Ignored."),
+ "adoption-agency-1.1" =>
+ _("End tag (%(name)) violates step 1, " +
+ "paragraph 1 of the adoption agency algorithm."),
+ "adoption-agency-1.2" =>
+ _("End tag (%(name)) violates step 1, " +
+ "paragraph 2 of the adoption agency algorithm."),
+ "adoption-agency-1.3" =>
+ _("End tag (%(name)) violates step 1, " +
+ "paragraph 3 of the adoption agency algorithm."),
+ "unexpected-end-tag-treated-as" =>
+ _("Unexpected end tag (%(originalName)). Treated as %(newName)."),
+ "no-end-tag" =>
+ _("This element (%(name)) has no end tag."),
+ "unexpected-implied-end-tag-in-table" =>
+ _("Unexpected implied end tag (%(name)) in the table phase."),
+ "unexpected-implied-end-tag-in-table-body" =>
+ _("Unexpected implied end tag (%(name)) in the table body phase."),
+ "unexpected-char-implies-table-voodoo" =>
+ _("Unexpected non-space characters in " +
+ "table context caused voodoo mode."),
+ "unexpected-start-tag-implies-table-voodoo" =>
+ _("Unexpected start tag (%(name)) in " +
+ "table context caused voodoo mode."),
+ "unexpected-end-tag-implies-table-voodoo" =>
+ _("Unexpected end tag (%(name)) in " +
+ "table context caused voodoo mode."),
+ "unexpected-cell-in-table-body" =>
+ _("Unexpected table cell start tag (%(name)) " +
+ "in the table body phase."),
+ "unexpected-cell-end-tag" =>
+ _("Got table cell end tag (%(name)) " +
+ "while required end tags are missing."),
+ "unexpected-end-tag-in-table-body" =>
+ _("Unexpected end tag (%(name)) in the table body phase. Ignored."),
+ "unexpected-implied-end-tag-in-table-row" =>
+ _("Unexpected implied end tag (%(name)) in the table row phase."),
+ "unexpected-end-tag-in-table-row" =>
+ _("Unexpected end tag (%(name)) in the table row phase. Ignored."),
+ "unexpected-select-in-select" =>
+ _("Unexpected select start tag in the select phase " +
+ "implies select start tag."),
+ "unexpected-start-tag-in-select" =>
+ _("Unexpected start tag token (%(name) in the select phase. " +
+ "Ignored."),
+ "unexpected-end-tag-in-select" =>
+ _("Unexpected end tag (%(name)) in the select phase. Ignored."),
+ "unexpected-char-after-body" =>
+ _("Unexpected non-space characters in the after body phase."),
+ "unexpected-start-tag-after-body" =>
+ _("Unexpected start tag token (%(name))" +
+ " in the after body phase."),
+ "unexpected-end-tag-after-body" =>
+ _("Unexpected end tag token (%(name))" +
+ " in the after body phase."),
+ "unexpected-char-in-frameset" =>
+ _("Unepxected characters in the frameset phase. Characters ignored."),
+ "unexpected-start-tag-in-frameset" =>
+ _("Unexpected start tag token (%(name))" +
+ " in the frameset phase. Ignored."),
+ "unexpected-frameset-in-frameset-innerhtml" =>
+ _("Unexpected end tag token (frameset) " +
+ "in the frameset phase (innerHTML)."),
+ "unexpected-end-tag-in-frameset" =>
+ _("Unexpected end tag token (%(name))" +
+ " in the frameset phase. Ignored."),
+ "unexpected-char-after-frameset" =>
+ _("Unexpected non-space characters in the " +
+ "after frameset phase. Ignored."),
+ "unexpected-start-tag-after-frameset" =>
+ _("Unexpected start tag (%(name))" +
+ " in the after frameset phase. Ignored."),
+ "unexpected-end-tag-after-frameset" =>
+ _("Unexpected end tag (%(name))" +
+ " in the after frameset phase. Ignored."),
+ "expected-eof-but-got-char" =>
+ _("Unexpected non-space characters. Expected end of file."),
+ "expected-eof-but-got-start-tag" =>
+ _("Unexpected start tag (%(name))" +
+ ". Expected end of file."),
+ "expected-eof-but-got-end-tag" =>
+ _("Unexpected end tag (%(name))" +
+ ". Expected end of file."),
+ }
+
end
diff --git a/vendor/plugins/HTML5lib/lib/html5/filters/iso639codes.rb b/vendor/plugins/HTML5lib/lib/html5/filters/iso639codes.rb
new file mode 100755
index 00000000..ce3c9623
--- /dev/null
+++ b/vendor/plugins/HTML5lib/lib/html5/filters/iso639codes.rb
@@ -0,0 +1,752 @@
+# borrowed from feedvalidator, original copyright license is
+#
+# Copyright (c) 2002-2006, Sam Ruby, Mark Pilgrim, Joseph Walton, and Phil Ringnalda
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+ISO_LANG = {
+ 'aa' => 'Afar',
+ 'ab' => 'Abkhazian',
+ 'ae' => 'Avestan',
+ 'af' => 'Afrikaans',
+ 'ak' => 'Akan',
+ 'am' => 'Amharic',
+ 'an' => 'Aragonese',
+ 'ar' => 'Arabic',
+ 'as' => 'Assamese',
+ 'av' => 'Avaric',
+ 'ay' => 'Aymara',
+ 'az' => 'Azerbaijani',
+ 'ba' => 'Bashkir',
+ 'be' => 'Byelorussian',
+ 'bg' => 'Bulgarian',
+ 'bh' => 'Bihari',
+ 'bi' => 'Bislama',
+ 'bm' => 'Bambara',
+ 'bn' => 'Bengali;Bangla',
+ 'bo' => 'Tibetan',
+ 'br' => 'Breton',
+ 'bs' => 'Bosnian',
+ 'ca' => 'Catalan',
+ 'ce' => 'Chechen',
+ 'ch' => 'Chamorro',
+ 'co' => 'Corsican',
+ 'cr' => 'Cree',
+ 'cs' => 'Czech',
+ 'cu' => 'Church Slavic',
+ 'cv' => 'Chuvash',
+ 'cy' => 'Welsh',
+ 'da' => 'Danish',
+ 'de' => 'German',
+ 'dv' => 'Divehi',
+ 'dz' => 'Dzongkha',
+ 'ee' => 'Ewe',
+ 'el' => 'Greek',
+ 'en' => 'English',
+ 'eo' => 'Esperanto',
+ 'es' => 'Spanish',
+ 'et' => 'Estonian',
+ 'eu' => 'Basque',
+ 'fa' => 'Persian (Farsi)',
+ 'ff' => 'Fulah',
+ 'fi' => 'Finnish',
+ 'fj' => 'Fiji',
+ 'fo' => 'Faroese',
+ 'fr' => 'French',
+ 'fy' => 'Frisian, Western',
+ 'ga' => 'Irish',
+ 'gd' => 'Scots Gaelic',
+ 'gl' => 'Galician',
+ 'gn' => 'Guarani',
+ 'gu' => 'Gujarati',
+ 'gv' => 'Manx',
+ 'ha' => 'Hausa',
+ 'he' => 'Hebrew',
+ 'hi' => 'Hindi',
+ 'ho' => 'Hiri Motu',
+ 'hr' => 'Croatian',
+ 'ht' => 'Haitian',
+ 'hu' => 'Hungarian',
+ 'hy' => 'Armenian',
+ 'hz' => 'Herero',
+ 'ia' => 'Interlingua',
+ 'id' => 'Indonesian',
+ 'ie' => 'Interlingue',
+ 'ig' => 'Igbo',
+ 'ii' => 'Sichuan Yi',
+ 'ik' => 'Inupiak',
+ 'io' => 'Ido',
+ 'is' => 'Icelandic',
+ 'it' => 'Italian',
+ 'iu' => 'Inuktitut',
+ 'ja' => 'Japanese',
+ 'jv' => 'Javanese',
+ 'ka' => 'Georgian',
+ 'kg' => 'Kongo',
+ 'ki' => 'Kikuyu; Gikuyu',
+ 'kj' => 'Kuanyama; Kwanyama',
+ 'kk' => 'Kazakh',
+ 'kl' => 'Greenlandic',
+ 'km' => 'Cambodian',
+ 'kn' => 'Kannada',
+ 'ko' => 'Korean',
+ 'kr' => 'Kanuri',
+ 'ks' => 'Kashmiri',
+ 'ku' => 'Kurdish',
+ 'kv' => 'Komi',
+ 'kw' => 'Cornish',
+ 'ky' => 'Kirghiz',
+ 'la' => 'Latin',
+ 'lb' => 'Letzeburgesch; Luxembourgish',
+ 'lg' => 'Ganda',
+ 'li' => 'Limburgan; Limburger, Limburgish',
+ 'ln' => 'Lingala',
+ 'lo' => 'Lao',
+ 'lt' => 'Lithuanian',
+ 'lu' => 'Luba-Katanga',
+ 'lv' => 'Latvian',
+ 'mg' => 'Malagasy',
+ 'mh' => 'Marshallese',
+ 'mi' => 'Maori',
+ 'mk' => 'Macedonian',
+ 'ml' => 'Malayalam',
+ 'mn' => 'Mongolian',
+ 'mo' => 'Moldavian',
+ 'mr' => 'Marathi',
+ 'ms' => 'Malay',
+ 'mt' => 'Maltese',
+ 'my' => 'Burmese',
+ 'na' => 'Nauru',
+ 'nb' => 'Norwegian Bokmal',
+ 'nd' => 'Ndebele, North',
+ 'ne' => 'Nepali',
+ 'ng' => 'Ndonga',
+ 'nl' => 'Dutch',
+ 'nn' => 'Norwegian Nynorsk',
+ 'no' => 'Norwegian',
+ 'nr' => 'Ndebele, South',
+ 'nv' => 'Navaho; Navajo',
+ 'ny' => 'Chewa; Chichewa; Nyanha',
+ 'oc' => 'Occitan',
+ 'oj' => 'Ojibwa',
+ 'om' => 'Afan (Oromo)',
+ 'or' => 'Oriya',
+ 'os' => 'Ossetian; Ossetic',
+ 'pa' => 'Punjabi',
+ 'pi' => 'Pali',
+ 'pl' => 'Polish',
+ 'ps' => 'Pushto',
+ 'pt' => 'Portuguese',
+ 'qu' => 'Quechua',
+ 'rm' => 'Rhaeto-Romance',
+ 'rn' => 'Kurundi',
+ 'ro' => 'Romanian',
+ 'ru' => 'Russian',
+ 'rw' => 'Kinyarwanda',
+ 'sa' => 'Sanskrit',
+ 'sc' => 'Sardinian',
+ 'sd' => 'Sindhi',
+ 'se' => 'Northern Sami',
+ 'sg' => 'Sangho',
+ 'sh' => 'Serbo-Croatian',
+ 'si' => 'Singhalese',
+ 'sk' => 'Slovak',
+ 'sl' => 'Slovenian',
+ 'sm' => 'Samoan',
+ 'sn' => 'Shona',
+ 'so' => 'Somali',
+ 'sq' => 'Albanian',
+ 'sr' => 'Serbian',
+ 'ss' => 'Swati',
+ 'st' => 'Sotho, Southern',
+ 'su' => 'Sundanese',
+ 'sv' => 'Swedish',
+ 'sw' => 'Swahili',
+ 'ta' => 'Tamil',
+ 'te' => 'Telugu',
+ 'tg' => 'Tajik',
+ 'th' => 'Thai',
+ 'ti' => 'Tigrinya',
+ 'tk' => 'Turkmen',
+ 'tl' => 'Tagalog',
+ 'tn' => 'Tswana',
+ 'to' => 'Tonga',
+ 'tr' => 'Turkish',
+ 'ts' => 'Tsonga',
+ 'tt' => 'Tatar',
+ 'tw' => 'Twi',
+ 'ty' => 'Tahitian',
+ 'ug' => 'Uigur',
+ 'uk' => 'Ukrainian',
+ 'ur' => 'Urdu',
+ 'uz' => 'Uzbek',
+ 've' => 'Venda',
+ 'vi' => 'Vietnamese',
+ 'vo' => 'Volapuk',
+ 'wa' => 'Walloon',
+ 'wo' => 'Wolof',
+ 'xh' => 'Xhosa',
+ 'yi' => 'Yiddish',
+ 'yo' => 'Yoruba',
+ 'za' => 'Zhuang',
+ 'zh' => 'Chinese',
+ 'zu' => 'Zulu',
+ 'x' => 'a user-defined language',
+ 'xx' => 'a user-defined language',
+
+ 'abk' => 'Abkhazian',
+ 'ace' => 'Achinese',
+ 'ach' => 'Acoli',
+ 'ada' => 'Adangme',
+ 'ady' => 'Adygei',
+ 'ady' => 'Adyghe',
+ 'aar' => 'Afar',
+ 'afh' => 'Afrihili',
+ 'afr' => 'Afrikaans',
+ 'afa' => 'Afro-Asiatic (Other)',
+ 'ain' => 'Ainu',
+ 'aka' => 'Akan',
+ 'akk' => 'Akkadian',
+ 'alb' => 'Albanian',
+ 'sqi' => 'Albanian',
+ 'gws' => 'Alemanic',
+ 'ale' => 'Aleut',
+ 'alg' => 'Algonquian languages',
+ 'tut' => 'Altaic (Other)',
+ 'amh' => 'Amharic',
+ 'anp' => 'Angika',
+ 'apa' => 'Apache languages',
+ 'ara' => 'Arabic',
+ 'arg' => 'Aragonese',
+ 'arc' => 'Aramaic',
+ 'arp' => 'Arapaho',
+ 'arn' => 'Araucanian',
+ 'arw' => 'Arawak',
+ 'arm' => 'Armenian',
+ 'hye' => 'Armenian',
+ 'rup' => 'Aromanian',
+ 'art' => 'Artificial (Other)',
+ 'asm' => 'Assamese',
+ 'ast' => 'Asturian',
+ 'ath' => 'Athapascan languages',
+ 'aus' => 'Australian languages',
+ 'map' => 'Austronesian (Other)',
+ 'ava' => 'Avaric',
+ 'ave' => 'Avestan',
+ 'awa' => 'Awadhi',
+ 'aym' => 'Aymara',
+ 'aze' => 'Azerbaijani',
+ 'ast' => 'Bable',
+ 'ban' => 'Balinese',
+ 'bat' => 'Baltic (Other)',
+ 'bal' => 'Baluchi',
+ 'bam' => 'Bambara',
+ 'bai' => 'Bamileke languages',
+ 'bad' => 'Banda',
+ 'bnt' => 'Bantu (Other)',
+ 'bas' => 'Basa',
+ 'bak' => 'Bashkir',
+ 'baq' => 'Basque',
+ 'eus' => 'Basque',
+ 'btk' => 'Batak (Indonesia)',
+ 'bej' => 'Beja',
+ 'bel' => 'Belarusian',
+ 'bem' => 'Bemba',
+ 'ben' => 'Bengali',
+ 'ber' => 'Berber (Other)',
+ 'bho' => 'Bhojpuri',
+ 'bih' => 'Bihari',
+ 'bik' => 'Bikol',
+ 'byn' => 'Bilin',
+ 'bin' => 'Bini',
+ 'bis' => 'Bislama',
+ 'byn' => 'Blin',
+ 'nob' => 'Bokmal, Norwegian',
+ 'bos' => 'Bosnian',
+ 'bra' => 'Braj',
+ 'bre' => 'Breton',
+ 'bug' => 'Buginese',
+ 'bul' => 'Bulgarian',
+ 'bua' => 'Buriat',
+ 'bur' => 'Burmese',
+ 'mya' => 'Burmese',
+ 'cad' => 'Caddo',
+ 'car' => 'Carib',
+ 'spa' => 'Castilian',
+ 'cat' => 'Catalan',
+ 'cau' => 'Caucasian (Other)',
+ 'ceb' => 'Cebuano',
+ 'cel' => 'Celtic (Other)',
+ 'cai' => 'Central American Indian (Other)',
+ 'chg' => 'Chagatai',
+ 'cmc' => 'Chamic languages',
+ 'cha' => 'Chamorro',
+ 'che' => 'Chechen',
+ 'chr' => 'Cherokee',
+ 'nya' => 'Chewa',
+ 'chy' => 'Cheyenne',
+ 'chb' => 'Chibcha',
+ 'nya' => 'Chichewa',
+ 'chi' => 'Chinese',
+ 'zho' => 'Chinese',
+ 'chn' => 'Chinook jargon',
+ 'chp' => 'Chipewyan',
+ 'cho' => 'Choctaw',
+ 'zha' => 'Chuang',
+ 'chu' => 'Church Slavic; Church Slavonic; Old Church Slavonic; Old Church Slavic; Old Bulgarian',
+ 'chk' => 'Chuukese',
+ 'chv' => 'Chuvash',
+ 'nwc' => 'Classical Nepal Bhasa; Classical Newari; Old Newari',
+ 'cop' => 'Coptic',
+ 'cor' => 'Cornish',
+ 'cos' => 'Corsican',
+ 'cre' => 'Cree',
+ 'mus' => 'Creek',
+ 'crp' => 'Creoles and pidgins(Other)',
+ 'cpe' => 'Creoles and pidgins, English-based (Other)',
+ 'cpf' => 'Creoles and pidgins, French-based (Other)',
+ 'cpp' => 'Creoles and pidgins, Portuguese-based (Other)',
+ 'crh' => 'Crimean Tatar; Crimean Turkish',
+ 'scr' => 'Croatian',
+ 'hrv' => 'Croatian',
+ 'cus' => 'Cushitic (Other)',
+ 'cze' => 'Czech',
+ 'ces' => 'Czech',
+ 'dak' => 'Dakota',
+ 'dan' => 'Danish',
+ 'dar' => 'Dargwa',
+ 'day' => 'Dayak',
+ 'del' => 'Delaware',
+ 'din' => 'Dinka',
+ 'div' => 'Divehi',
+ 'doi' => 'Dogri',
+ 'dgr' => 'Dogrib',
+ 'dra' => 'Dravidian (Other)',
+ 'dua' => 'Duala',
+ 'dut' => 'Dutch',
+ 'nld' => 'Dutch',
+ 'dum' => 'Dutch, Middle (ca. 1050-1350)',
+ 'dyu' => 'Dyula',
+ 'dzo' => 'Dzongkha',
+ 'efi' => 'Efik',
+ 'egy' => 'Egyptian (Ancient)',
+ 'eka' => 'Ekajuk',
+ 'elx' => 'Elamite',
+ 'eng' => 'English',
+ 'enm' => 'English, Middle (1100-1500)',
+ 'ang' => 'English, Old (ca.450-1100)',
+ 'myv' => 'Erzya',
+ 'epo' => 'Esperanto',
+ 'est' => 'Estonian',
+ 'ewe' => 'Ewe',
+ 'ewo' => 'Ewondo',
+ 'fan' => 'Fang',
+ 'fat' => 'Fanti',
+ 'fao' => 'Faroese',
+ 'fij' => 'Fijian',
+ 'fil' => 'Filipino; Pilipino',
+ 'fin' => 'Finnish',
+ 'fiu' => 'Finno-Ugrian (Other)',
+ 'fon' => 'Fon',
+ 'fre' => 'French',
+ 'fra' => 'French',
+ 'frm' => 'French, Middle (ca.1400-1600)',
+ 'fro' => 'French, Old (842-ca.1400)',
+ 'frs' => 'Frisian, Eastern',
+ 'fry' => 'Frisian, Western',
+ 'fur' => 'Friulian',
+ 'ful' => 'Fulah',
+ 'gaa' => 'Ga',
+ 'gla' => 'Gaelic',
+ 'glg' => 'Gallegan',
+ 'lug' => 'Ganda',
+ 'gay' => 'Gayo',
+ 'gba' => 'Gbaya',
+ 'gez' => 'Geez',
+ 'geo' => 'Georgian',
+ 'kat' => 'Georgian',
+ 'ger' => 'German',
+ 'deu' => 'German',
+ 'nds' => 'German, Low',
+ 'gmh' => 'German, Middle High (ca.1050-1500)',
+ 'goh' => 'German, Old High (ca.750-1050)',
+ 'gem' => 'Germanic (Other)',
+ 'kik' => 'Gikuyu',
+ 'gil' => 'Gilbertese',
+ 'gon' => 'Gondi',
+ 'gor' => 'Gorontalo',
+ 'got' => 'Gothic',
+ 'grb' => 'Grebo',
+ 'grc' => 'Greek, Ancient (to 1453)',
+ 'gre' => 'Greek, Modern (1453-)',
+ 'ell' => 'Greek, Modern (1453-)',
+ 'kal' => 'Greenlandic; Kalaallisut',
+ 'grn' => 'Guarani',
+ 'guj' => 'Gujarati',
+ 'gwi' => 'Gwich\'in',
+ 'hai' => 'Haida',
+ 'hat' => 'Haitian',
+ 'hau' => 'Hausa',
+ 'haw' => 'Hawaiian',
+ 'heb' => 'Hebrew',
+ 'her' => 'Herero',
+ 'hil' => 'Hiligaynon',
+ 'him' => 'Himachali',
+ 'hin' => 'Hindi',
+ 'hmo' => 'Hiri Motu',
+ 'hit' => 'Hittite',
+ 'hmn' => 'Hmong',
+ 'hun' => 'Hungarian',
+ 'hup' => 'Hupa',
+ 'iba' => 'Iban',
+ 'ice' => 'Icelandic',
+ 'isl' => 'Icelandic',
+ 'ido' => 'Ido',
+ 'ibo' => 'Igbo',
+ 'ijo' => 'Ijo',
+ 'ilo' => 'Iloko',
+ 'smn' => 'Inari Sami',
+ 'inc' => 'Indic (Other)',
+ 'ine' => 'Indo-European (Other)',
+ 'ind' => 'Indonesian',
+ 'inh' => 'Ingush',
+ 'ina' => 'Interlingua (International Auxiliary Language Association)',
+ 'ile' => 'Interlingue',
+ 'iku' => 'Inuktitut',
+ 'ipk' => 'Inupiaq',
+ 'ira' => 'Iranian (Other)',
+ 'gle' => 'Irish',
+ 'mga' => 'Irish, Middle (900-1200)',
+ 'sga' => 'Irish, Old (to 900)',
+ 'iro' => 'Iroquoian languages',
+ 'ita' => 'Italian',
+ 'jpn' => 'Japanese',
+ 'jav' => 'Javanese',
+ 'jrb' => 'Judeo-Arabic',
+ 'jpr' => 'Judeo-Persian',
+ 'kbd' => 'Kabardian',
+ 'kab' => 'Kabyle',
+ 'kac' => 'Kachin',
+ 'kal' => 'Kalaallisut',
+ 'xal' => 'Kalmyk',
+ 'kam' => 'Kamba',
+ 'kan' => 'Kannada',
+ 'kau' => 'Kanuri',
+ 'krc' => 'Karachay-Balkar',
+ 'kaa' => 'Kara-Kalpak',
+ 'krl' => 'Karelian',
+ 'kar' => 'Karen',
+ 'kas' => 'Kashmiri',
+ 'csb' => 'Kashubian',
+ 'kaw' => 'Kawi',
+ 'kaz' => 'Kazakh',
+ 'kha' => 'Khasi',
+ 'khm' => 'Khmer',
+ 'khi' => 'Khoisan (Other)',
+ 'kho' => 'Khotanese',
+ 'kik' => 'Kikuyu',
+ 'kmb' => 'Kimbundu',
+ 'kin' => 'Kinyarwanda',
+ 'kir' => 'Kirghiz',
+ 'tlh' => 'Klingon; tlhIngan-Hol',
+ 'kom' => 'Komi',
+ 'kon' => 'Kongo',
+ 'kok' => 'Konkani',
+ 'kor' => 'Korean',
+ 'kos' => 'Kosraean',
+ 'kpe' => 'Kpelle',
+ 'kro' => 'Kru',
+ 'kua' => 'Kuanyama',
+ 'kum' => 'Kumyk',
+ 'kur' => 'Kurdish',
+ 'kru' => 'Kurukh',
+ 'kut' => 'Kutenai',
+ 'kua' => 'Kwanyama',
+ 'lad' => 'Ladino',
+ 'lah' => 'Lahnda',
+ 'lam' => 'Lamba',
+ 'lao' => 'Lao',
+ 'lat' => 'Latin',
+ 'lav' => 'Latvian',
+ 'ltz' => 'Letzeburgesch',
+ 'lez' => 'Lezghian',
+ 'lim' => 'Limburgan',
+ 'lin' => 'Lingala',
+ 'lit' => 'Lithuanian',
+ 'jbo' => 'Lojban',
+ 'nds' => 'Low German',
+ 'dsb' => 'Lower Sorbian',
+ 'loz' => 'Lozi',
+ 'lub' => 'Luba-Katanga',
+ 'lua' => 'Luba-Lulua',
+ 'lui' => 'Luiseno',
+ 'smj' => 'Lule Sami',
+ 'lun' => 'Lunda',
+ 'luo' => 'Luo (Kenya and Tanzania)',
+ 'lus' => 'Lushai',
+ 'ltz' => 'Luxembourgish',
+ 'mac' => 'Macedonian',
+ 'mkd' => 'Macedonian',
+ 'mad' => 'Madurese',
+ 'mag' => 'Magahi',
+ 'mai' => 'Maithili',
+ 'mak' => 'Makasar',
+ 'mlg' => 'Malagasy',
+ 'may' => 'Malay',
+ 'msa' => 'Malay',
+ 'mal' => 'Malayalam',
+ 'mlt' => 'Maltese',
+ 'mnc' => 'Manchu',
+ 'mdr' => 'Mandar',
+ 'man' => 'Mandingo',
+ 'mni' => 'Manipuri',
+ 'mno' => 'Manobo languages',
+ 'glv' => 'Manx',
+ 'mao' => 'Maori',
+ 'mri' => 'Maori',
+ 'mar' => 'Marathi',
+ 'chm' => 'Mari',
+ 'mah' => 'Marshallese',
+ 'mwr' => 'Marwari',
+ 'mas' => 'Masai',
+ 'myn' => 'Mayan languages',
+ 'men' => 'Mende',
+ 'mic' => 'Micmac',
+ 'min' => 'Minangkabau',
+ 'mwl' => 'Mirandese',
+ 'mis' => 'Miscellaneous languages',
+ 'moh' => 'Mohawk',
+ 'mdf' => 'Moksha',
+ 'mol' => 'Moldavian',
+ 'mkh' => 'Mon-Khmer (Other)',
+ 'lol' => 'Mongo',
+ 'mon' => 'Mongolian',
+ 'mos' => 'Mossi',
+ 'mul' => 'Multiple languages',
+ 'mun' => 'Munda languages',
+ 'nah' => 'Nahuatl',
+ 'nau' => 'Nauru',
+ 'nav' => 'Navaho; Navajo',
+ 'nde' => 'Ndebele, North',
+ 'nbl' => 'Ndebele, South',
+ 'ndo' => 'Ndonga',
+ 'nap' => 'Neapolitan',
+ 'nep' => 'Nepali',
+ 'new' => 'Newari',
+ 'nia' => 'Nias',
+ 'nic' => 'Niger-Kordofanian (Other)',
+ 'ssa' => 'Nilo-Saharan (Other)',
+ 'niu' => 'Niuean',
+ 'nog' => 'Nogai',
+ 'non' => 'Norse, Old',
+ 'nai' => 'North American Indian (Other)',
+ 'frr' => 'Northern Frisian',
+ 'sme' => 'Northern Sami',
+ 'nso' => 'Northern Sotho; Pedi; Sepedi',
+ 'nde' => 'North Ndebele',
+ 'nor' => 'Norwegian',
+ 'nob' => 'Norwegian Bokmal',
+ 'nno' => 'Norwegian Nynorsk',
+ 'nub' => 'Nubian languages',
+ 'nym' => 'Nyamwezi',
+ 'nya' => 'Nyanja',
+ 'nyn' => 'Nyankole',
+ 'nno' => 'Nynorsk, Norwegian',
+ 'nyo' => 'Nyoro',
+ 'nzi' => 'Nzima',
+ 'oci' => 'Occitan (post 1500)',
+ 'oji' => 'Ojibwa',
+ 'ori' => 'Oriya',
+ 'orm' => 'Oromo',
+ 'osa' => 'Osage',
+ 'oss' => 'Ossetian; Ossetic',
+ 'oto' => 'Otomian languages',
+ 'pal' => 'Pahlavi',
+ 'pau' => 'Palauan',
+ 'pli' => 'Pali',
+ 'pam' => 'Pampanga',
+ 'pag' => 'Pangasinan',
+ 'pan' => 'Panjabi',
+ 'pap' => 'Papiamento',
+ 'paa' => 'Papuan (Other)',
+ 'per' => 'Persian',
+ 'fas' => 'Persian',
+ 'peo' => 'Persian, Old (ca.600-400)',
+ 'phi' => 'Philippine (Other)',
+ 'phn' => 'Phoenician',
+ 'pon' => 'Pohnpeian',
+ 'pol' => 'Polish',
+ 'por' => 'Portuguese',
+ 'pra' => 'Prakrit languages',
+ 'oci' => 'Provencal',
+ 'pro' => 'Provencal, Old (to 1500)',
+ 'pan' => 'Punjabi',
+ 'pus' => 'Pushto',
+ 'que' => 'Quechua',
+ 'roh' => 'Raeto-Romance',
+ 'raj' => 'Rajasthani',
+ 'rap' => 'Rapanui',
+ 'rar' => 'Rarotongan',
+ 'qaa' => 'Reserved for local use',
+ 'qtz' => 'Reserved for local use',
+ 'roa' => 'Romance (Other)',
+ 'rum' => 'Romanian',
+ 'ron' => 'Romanian',
+ 'rom' => 'Romany',
+ 'run' => 'Rundi',
+ 'rus' => 'Russian',
+ 'sal' => 'Salishan languages',
+ 'sam' => 'Samaritan Aramaic',
+ 'smi' => 'Sami languages (Other)',
+ 'smo' => 'Samoan',
+ 'sad' => 'Sandawe',
+ 'sag' => 'Sango',
+ 'san' => 'Sanskrit',
+ 'sat' => 'Santali',
+ 'srd' => 'Sardinian',
+ 'sas' => 'Sasak',
+ 'nds' => 'Saxon, Low',
+ 'sco' => 'Scots',
+ 'gla' => 'Scottish Gaelic',
+ 'sel' => 'Selkup',
+ 'sem' => 'Semitic (Other)',
+ 'nso' => 'Sepedi; Northern Sotho; Pedi',
+ 'scc' => 'Serbian',
+ 'srp' => 'Serbian',
+ 'srr' => 'Serer',
+ 'shn' => 'Shan',
+ 'sna' => 'Shona',
+ 'iii' => 'Sichuan Yi',
+ 'scn' => 'Sicilian',
+ 'sid' => 'Sidamo',
+ 'sgn' => 'Sign languages',
+ 'bla' => 'Siksika',
+ 'snd' => 'Sindhi',
+ 'sin' => 'Sinhalese',
+ 'sit' => 'Sino-Tibetan (Other)',
+ 'sio' => 'Siouan languages',
+ 'sms' => 'Skolt Sami',
+ 'den' => 'Slave (Athapascan)',
+ 'sla' => 'Slavic (Other)',
+ 'slo' => 'Slovak',
+ 'slk' => 'Slovak',
+ 'slv' => 'Slovenian',
+ 'sog' => 'Sogdian',
+ 'som' => 'Somali',
+ 'son' => 'Songhai',
+ 'snk' => 'Soninke',
+ 'wen' => 'Sorbian languages',
+ 'nso' => 'Sotho, Northern',
+ 'sot' => 'Sotho, Southern',
+ 'sai' => 'South American Indian (Other)',
+ 'alt' => 'Southern Altai',
+ 'sma' => 'Southern Sami',
+ 'nbl' => 'South Ndebele',
+ 'spa' => 'Spanish',
+ 'srn' => 'Sranan Tongo',
+ 'suk' => 'Sukuma',
+ 'sux' => 'Sumerian',
+ 'sun' => 'Sundanese',
+ 'sus' => 'Susu',
+ 'swa' => 'Swahili',
+ 'ssw' => 'Swati',
+ 'swe' => 'Swedish',
+ 'gsw' => 'Swiss German; Alemanic',
+ 'syr' => 'Syriac',
+ 'tgl' => 'Tagalog',
+ 'tah' => 'Tahitian',
+ 'tai' => 'Tai (Other)',
+ 'tgk' => 'Tajik',
+ 'tmh' => 'Tamashek',
+ 'tam' => 'Tamil',
+ 'tat' => 'Tatar',
+ 'tel' => 'Telugu',
+ 'ter' => 'Tereno',
+ 'tet' => 'Tetum',
+ 'tha' => 'Thai',
+ 'tib' => 'Tibetan',
+ 'bod' => 'Tibetan',
+ 'tig' => 'Tigre',
+ 'tir' => 'Tigrinya',
+ 'tem' => 'Timne',
+ 'tiv' => 'Tiv',
+ 'tlh' => 'tlhIngan-Hol; Klingon',
+ 'tli' => 'Tlingit',
+ 'tpi' => 'Tok Pisin',
+ 'tkl' => 'Tokelau',
+ 'tog' => 'Tonga (Nyasa)',
+ 'ton' => 'Tonga (Tonga Islands)',
+ 'tsi' => 'Tsimshian',
+ 'tso' => 'Tsonga',
+ 'tsn' => 'Tswana',
+ 'tum' => 'Tumbuka',
+ 'tup' => 'Tupi languages',
+ 'tur' => 'Turkish',
+ 'ota' => 'Turkish, Ottoman (1500-1928)',
+ 'tuk' => 'Turkmen',
+ 'tvl' => 'Tuvalu',
+ 'tyv' => 'Tuvinian',
+ 'twi' => 'Twi',
+ 'udm' => 'Udmurt',
+ 'uga' => 'Ugaritic',
+ 'uig' => 'Uighur',
+ 'ukr' => 'Ukrainian',
+ 'umb' => 'Umbundu',
+ 'und' => 'Undetermined',
+ 'hsb' => 'Upper Sorbian',
+ 'urd' => 'Urdu',
+ 'uzb' => 'Uzbek',
+ 'vai' => 'Vai',
+ 'cat' => 'Valencian',
+ 'ven' => 'Venda',
+ 'vie' => 'Vietnamese',
+ 'vol' => 'Volapuk',
+ 'vot' => 'Votic',
+ 'wak' => 'Wakashan languages',
+ 'wal' => 'Walamo',
+ 'wln' => 'Walloon',
+ 'war' => 'Waray',
+ 'was' => 'Washo',
+ 'wel' => 'Welsh',
+ 'cym' => 'Welsh',
+ 'fry' => 'Wester Frisian',
+ 'wol' => 'Wolof',
+ 'xho' => 'Xhosa',
+ 'sah' => 'Yakut',
+ 'yao' => 'Yao',
+ 'yap' => 'Yapese',
+ 'yid' => 'Yiddish',
+ 'yor' => 'Yoruba',
+ 'ypk' => 'Yupik languages',
+ 'znd' => 'Zande',
+ 'zap' => 'Zapotec',
+ 'zen' => 'Zenaga',
+ 'zha' => 'Zhuang',
+ 'zul' => 'Zulu',
+ 'zun' => 'Zuni'
+}
+
+def is_valid_lang_code(value)
+ if value.include? '-'
+ lang, sublang = value.split('-', 2)
+ else
+ lang = value
+ end
+ !!ISO_LANG[lang.downcase]
+end
\ No newline at end of file
diff --git a/vendor/plugins/HTML5lib/lib/html5/filters/rfc2046.rb b/vendor/plugins/HTML5lib/lib/html5/filters/rfc2046.rb
new file mode 100755
index 00000000..593baf42
--- /dev/null
+++ b/vendor/plugins/HTML5lib/lib/html5/filters/rfc2046.rb
@@ -0,0 +1,30 @@
+# adapted from feedvalidator, original copyright license is
+#
+# Copyright (c) 2002-2006, Sam Ruby, Mark Pilgrim, Joseph Walton, and Phil Ringnalda
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+
+# mime_re = Regexp.new('[^\s()<>,;:\\"/[\]?=]+/[^\s()<>,;:\\"/[\]?=]+(\s*;\s*[^\s()<>,;:\\"/[\]?=]+=("(\\"|[^"])*"|[^\s()<>,;:\\"/[\]?=]+))*$')
+
+def is_valid_mime_type(value)
+ # !!mime_re.match(value)
+ true
+end
+
diff --git a/vendor/plugins/HTML5lib/lib/html5/filters/rfc3987.rb b/vendor/plugins/HTML5lib/lib/html5/filters/rfc3987.rb
new file mode 100755
index 00000000..bf730e06
--- /dev/null
+++ b/vendor/plugins/HTML5lib/lib/html5/filters/rfc3987.rb
@@ -0,0 +1,89 @@
+# adapted from feedvalidator, original copyright license is
+#
+# Copyright (c) 2002-2006, Sam Ruby, Mark Pilgrim, Joseph Walton, and Phil Ringnalda
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+iana_schemes = [ # http://www.iana.org/assignments/uri-schemes.html
+ "ftp", "http", "gopher", "mailto", "news", "nntp", "telnet", "wais",
+ "file", "prospero", "z39.50s", "z39.50r", "cid", "mid", "vemmi",
+ "service", "imap", "nfs", "acap", "rtsp", "tip", "pop", "data", "dav",
+ "opaquelocktoken", "sip", "sips", "tel", "fax", "modem", "ldap",
+ "https", "soap.beep", "soap.beeps", "xmlrpc.beep", "xmlrpc.beeps",
+ "urn", "go", "h323", "ipp", "tftp", "mupdate", "pres", "im", "mtqp",
+ "iris.beep", "dict", "snmp", "crid", "tag", "dns", "info"
+]
+ALLOWED_SCHEMES = iana_schemes + ['javascript']
+
+RFC2396 = Regexp.new("^([a-zA-Z][0-9a-zA-Z+\\-\\.]*:)?/{0,2}[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]*$", Regexp::MULTILINE)
+rfc2396_full = Regexp.new("[a-zA-Z][0-9a-zA-Z+\\-\\.]*:(//)?[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]+$")
+URN = Regexp.new("^[Uu][Rr][Nn]:[a-zA-Z0-9][a-zA-Z0-9-]{1,31}:([a-zA-Z0-9()+,\.:=@;$_!*'\-]|%[0-9A-Fa-f]{2})+$")
+TAG = Regexp.new("^tag:([a-z0-9\\-\._]+?@)?[a-z0-9\.\-]+?,\d{4}(-\d{2}(-\d{2})?)?:[0-9a-zA-Z;/\?:@&=+$\.\-_!~*'\(\)%,]*(#[0-9a-zA-Z;/\?:@&=+$\.\-_!~*'\(\)%,]*)?$")
+
+def is_valid_uri(value, uri_pattern = RFC2396)
+ scheme = value.split(':').first
+ scheme.downcase! if scheme
+ if scheme == 'tag'
+ if !TAG.match(value)
+ return false, "invalid-tag-uri"
+ end
+ elsif scheme == "urn"
+ if !URN.match(value)
+ return false, "invalid-urn"
+ end
+ elsif uri_pattern.match(value).to_a.reject{|i| i == ''}.compact.length == 0 || uri_pattern.match(value)[0] != value
+ urichars = Regexp.new("^[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]$", Regexp::MULTILINE)
+ if value.length > 0
+ value.each_byte do |b|
+ if b < 128 and !urichars.match([b].pack('c*'))
+ return false, "invalid-uri-char"
+ end
+ end
+ else
+ begin
+ if uri_pattern.match(value.encode('idna'))
+ return false, "uri-not-iri"
+ end
+ rescue
+ end
+ return false, "invalid-uri"
+ end
+ elsif ['http','ftp'].include?(scheme)
+ if !value.match(%r{^\w+://[^/].*})
+ return false, "invalid-http-or-ftp-uri"
+ end
+ elsif value.index(':') && scheme.match(/^[a-z]+$/) && !ALLOWED_SCHEMES.include?(scheme)
+ return false, "invalid-scheme"
+ end
+ return true, ""
+end
+
+def is_valid_iri(value)
+ begin
+ if value.length > 0
+ value = value.encode('idna')
+ end
+ rescue
+ end
+ is_valid_uri(value)
+end
+
+def is_valid_fully_qualified_uri(value)
+ is_valid_uri(value, rfc2396_full)
+end
diff --git a/vendor/plugins/HTML5lib/lib/html5/filters/validator.rb b/vendor/plugins/HTML5lib/lib/html5/filters/validator.rb
new file mode 100755
index 00000000..99e67baa
--- /dev/null
+++ b/vendor/plugins/HTML5lib/lib/html5/filters/validator.rb
@@ -0,0 +1,830 @@
+# HTML 5 conformance checker
+#
+# Warning: this module is experimental, incomplete, and subject to removal at any time.
+#
+# Usage:
+# >>> from html5lib.html5parser import HTMLParser
+# >>> from html5lib.filters.validator import HTMLConformanceChecker
+# >>> p = HTMLParser(tokenizer=HTMLConformanceChecker)
+# >>> p.parse('\n')
+# < nil>
+# >>> p.errors
+# [((2, 14), 'unknown-attribute', {'attributeName' => u'foo', 'tagName' => u'html'})]
+
+require 'html5/constants'
+require 'html5/filters/base'
+require 'html5/filters/iso639codes'
+require 'html5/filters/rfc3987'
+require 'html5/filters/rfc2046'
+
+def _(str); str; end
+
+class String
+ # lifted from rails
+ def underscore()
+ self.gsub(/::/, '/').
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
+ tr("-", "_").
+ downcase
+ end
+end
+
+HTML5::E.update({
+ "unknown-start-tag" =>
+ _("Unknown start tag <%(tagName)>."),
+ "unknown-attribute" =>
+ _("Unknown '%(attributeName)' attribute on <%(tagName)>."),
+ "missing-required-attribute" =>
+ _("The '%(attributeName)' attribute is required on <%(tagName)>."),
+ "unknown-input-type" =>
+ _("Illegal value for attribute on ."),
+ "attribute-not-allowed-on-this-input-type" =>
+ _("The '%(attributeName)' attribute is not allowed on ."),
+ "deprecated-attribute" =>
+ _("This attribute is deprecated: '%(attributeName)' attribute on <%(tagName)>."),
+ "duplicate-value-in-token-list" =>
+ _("Duplicate value in token list: '%(attributeValue)' in '%(attributeName)' attribute on <%(tagName)>."),
+ "invalid-attribute-value" =>
+ _("Invalid attribute value: '%(attributeName)' attribute on <%(tagName)>."),
+ "space-in-id" =>
+ _("Whitespace is not allowed here: '%(attributeName)' attribute on <%(tagName)>."),
+ "duplicate-id" =>
+ _("This ID was already defined earlier: 'id' attribute on <%(tagName)>."),
+ "attribute-value-can-not-be-blank" =>
+ _("This value can not be blank: '%(attributeName)' attribute on <%(tagName)>."),
+ "id-does-not-exist" =>
+ _("This value refers to a non-existent ID: '%(attributeName)' attribute on <%(tagName)>."),
+ "invalid-enumerated-value" =>
+ _("Value must be one of %(enumeratedValues): '%(attributeName)' attribute on <%tagName)>."),
+ "invalid-boolean-value" =>
+ _("Value must be one of %(enumeratedValues): '%(attributeName)' attribute on <%tagName)>."),
+ "contextmenu-must-point-to-menu" =>
+ _("The contextmenu attribute must point to an ID defined on a