module HTML5
class EOF < Exception; end
def self._(str); str end
CONTENT_MODEL_FLAGS = [
:PCDATA,
:RCDATA,
:CDATA,
:PLAINTEXT
]
SCOPING_ELEMENTS = %w[
button
caption
html
marquee
object
table
td
th
]
FORMATTING_ELEMENTS = %w[
a
b
big
em
font
i
nobr
s
small
strike
strong
tt
u
]
SPECIAL_ELEMENTS = %w[
address
area
base
basefont
bgsound
blockquote
body
br
center
col
colgroup
dd
dir
div
dl
dt
embed
fieldset
form
frame
frameset
h1
h2
h3
h4
h5
h6
head
hr
iframe
image
img
input
isindex
li
link
listing
menu
meta
noembed
noframes
noscript
ol
optgroup
option
p
param
plaintext
pre
script
select
spacer
style
tbody
textarea
tfoot
thead
title
tr
ul
wbr
]
SPACE_CHARACTERS = %W[
\t
\n
\x0B
\x0C
\x20
\r
]
TABLE_INSERT_MODE_ELEMENTS = %w[
table
tbody
tfoot
thead
tr
]
ASCII_LOWERCASE = ('a'..'z').to_a.join('')
ASCII_UPPERCASE = ('A'..'Z').to_a.join('')
ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE
DIGITS = '0'..'9'
HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
# Heading elements need to be ordered
HEADING_ELEMENTS = %w[
h1
h2
h3
h4
h5
h6
]
# XXX What about event-source and command?
VOID_ELEMENTS = %w[
base
link
meta
hr
br
img
embed
param
area
col
input
]
CDATA_ELEMENTS = %w[title textarea]
RCDATA_ELEMENTS = %w[
style
script
xmp
iframe
noembed
noframes
noscript
]
BOOLEAN_ATTRIBUTES = {
:global => %w[irrelevant],
'style' => %w[scoped],
'img' => %w[ismap],
'audio' => %w[autoplay controls],
'video' => %w[autoplay controls],
'script' => %w[defer async],
'details' => %w[open],
'datagrid' => %w[multiple disabled],
'command' => %w[hidden disabled checked default],
'menu' => %w[autosubmit],
'fieldset' => %w[disabled readonly],
'option' => %w[disabled readonly selected],
'optgroup' => %w[disabled readonly],
'button' => %w[disabled autofocus],
'input' => %w[disabled readonly required autofocus checked ismap],
'select' => %w[disabled readonly autofocus multiple],
'output' => %w[disabled readonly]
}
# entitiesWindows1252 has to be _ordered_ and needs to have an index.
ENTITIES_WINDOWS1252 = [
8364, # 0x80 0x20AC EURO SIGN
65533, # 0x81 UNDEFINED
8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK
402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK
8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK
8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS
8224, # 0x86 0x2020 DAGGER
8225, # 0x87 0x2021 DOUBLE DAGGER
710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
8240, # 0x89 0x2030 PER MILLE SIGN
352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON
8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE
65533, # 0x8D UNDEFINED
381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON
65533, # 0x8F UNDEFINED
65533, # 0x90 UNDEFINED
8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK
8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK
8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK
8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK
8226, # 0x95 0x2022 BULLET
8211, # 0x96 0x2013 EN DASH
8212, # 0x97 0x2014 EM DASH
732, # 0x98 0x02DC SMALL TILDE
8482, # 0x99 0x2122 TRADE MARK SIGN
353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON
8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE
65533, # 0x9D UNDEFINED
382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON
376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
]
# ENTITIES was generated from Python using the following code:
#
# import constants
# entities = constants.entities.items()
# entities.sort()
# list = [ ' '.join([repr(entity), '=>', ord(value)<128 and
# repr(str(value)) or repr(value.encode('utf-8')).replace("'",'"')])
# for entity, value in entities]
# print ' ENTITIES = {\n ' + ',\n '.join(list) + '\n }'
ENTITIES = {
'AElig' => "\xc3\x86",
'AElig;' => "\xc3\x86",
'AMP' => '&',
'AMP;' => '&',
'Aacute' => "\xc3\x81",
'Aacute;' => "\xc3\x81",
'Acirc' => "\xc3\x82",
'Acirc;' => "\xc3\x82",
'Agrave' => "\xc3\x80",
'Agrave;' => "\xc3\x80",
'Alpha;' => "\xce\x91",
'Aring' => "\xc3\x85",
'Aring;' => "\xc3\x85",
'Atilde' => "\xc3\x83",
'Atilde;' => "\xc3\x83",
'Auml' => "\xc3\x84",
'Auml;' => "\xc3\x84",
'Beta;' => "\xce\x92",
'COPY' => "\xc2\xa9",
'COPY;' => "\xc2\xa9",
'Ccedil' => "\xc3\x87",
'Ccedil;' => "\xc3\x87",
'Chi;' => "\xce\xa7",
'Dagger;' => "\xe2\x80\xa1",
'Delta;' => "\xce\x94",
'ETH' => "\xc3\x90",
'ETH;' => "\xc3\x90",
'Eacute' => "\xc3\x89",
'Eacute;' => "\xc3\x89",
'Ecirc' => "\xc3\x8a",
'Ecirc;' => "\xc3\x8a",
'Egrave' => "\xc3\x88",
'Egrave;' => "\xc3\x88",
'Epsilon;' => "\xce\x95",
'Eta;' => "\xce\x97",
'Euml' => "\xc3\x8b",
'Euml;' => "\xc3\x8b",
'GT' => '>',
'GT;' => '>',
'Gamma;' => "\xce\x93",
'Iacute' => "\xc3\x8d",
'Iacute;' => "\xc3\x8d",
'Icirc' => "\xc3\x8e",
'Icirc;' => "\xc3\x8e",
'Igrave' => "\xc3\x8c",
'Igrave;' => "\xc3\x8c",
'Iota;' => "\xce\x99",
'Iuml' => "\xc3\x8f",
'Iuml;' => "\xc3\x8f",
'Kappa;' => "\xce\x9a",
'LT' => '<',
'LT;' => '<',
'Lambda;' => "\xce\x9b",
'Mu;' => "\xce\x9c",
'Ntilde' => "\xc3\x91",
'Ntilde;' => "\xc3\x91",
'Nu;' => "\xce\x9d",
'OElig;' => "\xc5\x92",
'Oacute' => "\xc3\x93",
'Oacute;' => "\xc3\x93",
'Ocirc' => "\xc3\x94",
'Ocirc;' => "\xc3\x94",
'Ograve' => "\xc3\x92",
'Ograve;' => "\xc3\x92",
'Omega;' => "\xce\xa9",
'Omicron;' => "\xce\x9f",
'Oslash' => "\xc3\x98",
'Oslash;' => "\xc3\x98",
'Otilde' => "\xc3\x95",
'Otilde;' => "\xc3\x95",
'Ouml' => "\xc3\x96",
'Ouml;' => "\xc3\x96",
'Phi;' => "\xce\xa6",
'Pi;' => "\xce\xa0",
'Prime;' => "\xe2\x80\xb3",
'Psi;' => "\xce\xa8",
'QUOT' => '"',
'QUOT;' => '"',
'REG' => "\xc2\xae",
'REG;' => "\xc2\xae",
'Rho;' => "\xce\xa1",
'Scaron;' => "\xc5\xa0",
'Sigma;' => "\xce\xa3",
'THORN' => "\xc3\x9e",
'THORN;' => "\xc3\x9e",
'TRADE;' => "\xe2\x84\xa2",
'Tau;' => "\xce\xa4",
'Theta;' => "\xce\x98",
'Uacute' => "\xc3\x9a",
'Uacute;' => "\xc3\x9a",
'Ucirc' => "\xc3\x9b",
'Ucirc;' => "\xc3\x9b",
'Ugrave' => "\xc3\x99",
'Ugrave;' => "\xc3\x99",
'Upsilon;' => "\xce\xa5",
'Uuml' => "\xc3\x9c",
'Uuml;' => "\xc3\x9c",
'Xi;' => "\xce\x9e",
'Yacute' => "\xc3\x9d",
'Yacute;' => "\xc3\x9d",
'Yuml;' => "\xc5\xb8",
'Zeta;' => "\xce\x96",
'aacute' => "\xc3\xa1",
'aacute;' => "\xc3\xa1",
'acirc' => "\xc3\xa2",
'acirc;' => "\xc3\xa2",
'acute' => "\xc2\xb4",
'acute;' => "\xc2\xb4",
'aelig' => "\xc3\xa6",
'aelig;' => "\xc3\xa6",
'agrave' => "\xc3\xa0",
'agrave;' => "\xc3\xa0",
'alefsym;' => "\xe2\x84\xb5",
'alpha;' => "\xce\xb1",
'amp' => '&',
'amp;' => '&',
'and;' => "\xe2\x88\xa7",
'ang;' => "\xe2\x88\xa0",
'apos;' => "'",
'aring' => "\xc3\xa5",
'aring;' => "\xc3\xa5",
'asymp;' => "\xe2\x89\x88",
'atilde' => "\xc3\xa3",
'atilde;' => "\xc3\xa3",
'auml' => "\xc3\xa4",
'auml;' => "\xc3\xa4",
'bdquo;' => "\xe2\x80\x9e",
'beta;' => "\xce\xb2",
'brvbar' => "\xc2\xa6",
'brvbar;' => "\xc2\xa6",
'bull;' => "\xe2\x80\xa2",
'cap;' => "\xe2\x88\xa9",
'ccedil' => "\xc3\xa7",
'ccedil;' => "\xc3\xa7",
'cedil' => "\xc2\xb8",
'cedil;' => "\xc2\xb8",
'cent' => "\xc2\xa2",
'cent;' => "\xc2\xa2",
'chi;' => "\xcf\x87",
'circ;' => "\xcb\x86",
'clubs;' => "\xe2\x99\xa3",
'cong;' => "\xe2\x89\x85",
'copy' => "\xc2\xa9",
'copy;' => "\xc2\xa9",
'crarr;' => "\xe2\x86\xb5",
'cup;' => "\xe2\x88\xaa",
'curren' => "\xc2\xa4",
'curren;' => "\xc2\xa4",
'dArr;' => "\xe2\x87\x93",
'dagger;' => "\xe2\x80\xa0",
'darr;' => "\xe2\x86\x93",
'deg' => "\xc2\xb0",
'deg;' => "\xc2\xb0",
'delta;' => "\xce\xb4",
'diams;' => "\xe2\x99\xa6",
'divide' => "\xc3\xb7",
'divide;' => "\xc3\xb7",
'eacute' => "\xc3\xa9",
'eacute;' => "\xc3\xa9",
'ecirc' => "\xc3\xaa",
'ecirc;' => "\xc3\xaa",
'egrave' => "\xc3\xa8",
'egrave;' => "\xc3\xa8",
'empty;' => "\xe2\x88\x85",
'emsp;' => "\xe2\x80\x83",
'ensp;' => "\xe2\x80\x82",
'epsilon;' => "\xce\xb5",
'equiv;' => "\xe2\x89\xa1",
'eta;' => "\xce\xb7",
'eth' => "\xc3\xb0",
'eth;' => "\xc3\xb0",
'euml' => "\xc3\xab",
'euml;' => "\xc3\xab",
'euro;' => "\xe2\x82\xac",
'exist;' => "\xe2\x88\x83",
'fnof;' => "\xc6\x92",
'forall;' => "\xe2\x88\x80",
'frac12' => "\xc2\xbd",
'frac12;' => "\xc2\xbd",
'frac14' => "\xc2\xbc",
'frac14;' => "\xc2\xbc",
'frac34' => "\xc2\xbe",
'frac34;' => "\xc2\xbe",
'frasl;' => "\xe2\x81\x84",
'gamma;' => "\xce\xb3",
'ge;' => "\xe2\x89\xa5",
'gt' => '>',
'gt;' => '>',
'hArr;' => "\xe2\x87\x94",
'harr;' => "\xe2\x86\x94",
'hearts;' => "\xe2\x99\xa5",
'hellip;' => "\xe2\x80\xa6",
'iacute' => "\xc3\xad",
'iacute;' => "\xc3\xad",
'icirc' => "\xc3\xae",
'icirc;' => "\xc3\xae",
'iexcl' => "\xc2\xa1",
'iexcl;' => "\xc2\xa1",
'igrave' => "\xc3\xac",
'igrave;' => "\xc3\xac",
'image;' => "\xe2\x84\x91",
'infin;' => "\xe2\x88\x9e",
'int;' => "\xe2\x88\xab",
'iota;' => "\xce\xb9",
'iquest' => "\xc2\xbf",
'iquest;' => "\xc2\xbf",
'isin;' => "\xe2\x88\x88",
'iuml' => "\xc3\xaf",
'iuml;' => "\xc3\xaf",
'kappa;' => "\xce\xba",
'lArr;' => "\xe2\x87\x90",
'lambda;' => "\xce\xbb",
'lang;' => "\xe3\x80\x88",
'laquo' => "\xc2\xab",
'laquo;' => "\xc2\xab",
'larr;' => "\xe2\x86\x90",
'lceil;' => "\xe2\x8c\x88",
'ldquo;' => "\xe2\x80\x9c",
'le;' => "\xe2\x89\xa4",
'lfloor;' => "\xe2\x8c\x8a",
'lowast;' => "\xe2\x88\x97",
'loz;' => "\xe2\x97\x8a",
'lrm;' => "\xe2\x80\x8e",
'lsaquo;' => "\xe2\x80\xb9",
'lsquo;' => "\xe2\x80\x98",
'lt' => '<',
'lt;' => '<',
'macr' => "\xc2\xaf",
'macr;' => "\xc2\xaf",
'mdash;' => "\xe2\x80\x94",
'micro' => "\xc2\xb5",
'micro;' => "\xc2\xb5",
'middot' => "\xc2\xb7",
'middot;' => "\xc2\xb7",
'minus;' => "\xe2\x88\x92",
'mu;' => "\xce\xbc",
'nabla;' => "\xe2\x88\x87",
'nbsp' => "\xc2\xa0",
'nbsp;' => "\xc2\xa0",
'ndash;' => "\xe2\x80\x93",
'ne;' => "\xe2\x89\xa0",
'ni;' => "\xe2\x88\x8b",
'not' => "\xc2\xac",
'not;' => "\xc2\xac",
'notin;' => "\xe2\x88\x89",
'nsub;' => "\xe2\x8a\x84",
'ntilde' => "\xc3\xb1",
'ntilde;' => "\xc3\xb1",
'nu;' => "\xce\xbd",
'oacute' => "\xc3\xb3",
'oacute;' => "\xc3\xb3",
'ocirc' => "\xc3\xb4",
'ocirc;' => "\xc3\xb4",
'oelig;' => "\xc5\x93",
'ograve' => "\xc3\xb2",
'ograve;' => "\xc3\xb2",
'oline;' => "\xe2\x80\xbe",
'omega;' => "\xcf\x89",
'omicron;' => "\xce\xbf",
'oplus;' => "\xe2\x8a\x95",
'or;' => "\xe2\x88\xa8",
'ordf' => "\xc2\xaa",
'ordf;' => "\xc2\xaa",
'ordm' => "\xc2\xba",
'ordm;' => "\xc2\xba",
'oslash' => "\xc3\xb8",
'oslash;' => "\xc3\xb8",
'otilde' => "\xc3\xb5",
'otilde;' => "\xc3\xb5",
'otimes;' => "\xe2\x8a\x97",
'ouml' => "\xc3\xb6",
'ouml;' => "\xc3\xb6",
'para' => "\xc2\xb6",
'para;' => "\xc2\xb6",
'part;' => "\xe2\x88\x82",
'permil;' => "\xe2\x80\xb0",
'perp;' => "\xe2\x8a\xa5",
'phi;' => "\xcf\x86",
'pi;' => "\xcf\x80",
'piv;' => "\xcf\x96",
'plusmn' => "\xc2\xb1",
'plusmn;' => "\xc2\xb1",
'pound' => "\xc2\xa3",
'pound;' => "\xc2\xa3",
'prime;' => "\xe2\x80\xb2",
'prod;' => "\xe2\x88\x8f",
'prop;' => "\xe2\x88\x9d",
'psi;' => "\xcf\x88",
'quot' => '"',
'quot;' => '"',
'rArr;' => "\xe2\x87\x92",
'radic;' => "\xe2\x88\x9a",
'rang;' => "\xe3\x80\x89",
'raquo' => "\xc2\xbb",
'raquo;' => "\xc2\xbb",
'rarr;' => "\xe2\x86\x92",
'rceil;' => "\xe2\x8c\x89",
'rdquo;' => "\xe2\x80\x9d",
'real;' => "\xe2\x84\x9c",
'reg' => "\xc2\xae",
'reg;' => "\xc2\xae",
'rfloor;' => "\xe2\x8c\x8b",
'rho;' => "\xcf\x81",
'rlm;' => "\xe2\x80\x8f",
'rsaquo;' => "\xe2\x80\xba",
'rsquo;' => "\xe2\x80\x99",
'sbquo;' => "\xe2\x80\x9a",
'scaron;' => "\xc5\xa1",
'sdot;' => "\xe2\x8b\x85",
'sect' => "\xc2\xa7",
'sect;' => "\xc2\xa7",
'shy' => "\xc2\xad",
'shy;' => "\xc2\xad",
'sigma;' => "\xcf\x83",
'sigmaf;' => "\xcf\x82",
'sim;' => "\xe2\x88\xbc",
'spades;' => "\xe2\x99\xa0",
'sub;' => "\xe2\x8a\x82",
'sube;' => "\xe2\x8a\x86",
'sum;' => "\xe2\x88\x91",
'sup1' => "\xc2\xb9",
'sup1;' => "\xc2\xb9",
'sup2' => "\xc2\xb2",
'sup2;' => "\xc2\xb2",
'sup3' => "\xc2\xb3",
'sup3;' => "\xc2\xb3",
'sup;' => "\xe2\x8a\x83",
'supe;' => "\xe2\x8a\x87",
'szlig' => "\xc3\x9f",
'szlig;' => "\xc3\x9f",
'tau;' => "\xcf\x84",
'there4;' => "\xe2\x88\xb4",
'theta;' => "\xce\xb8",
'thetasym;' => "\xcf\x91",
'thinsp;' => "\xe2\x80\x89",
'thorn' => "\xc3\xbe",
'thorn;' => "\xc3\xbe",
'tilde;' => "\xcb\x9c",
'times' => "\xc3\x97",
'times;' => "\xc3\x97",
'trade;' => "\xe2\x84\xa2",
'uArr;' => "\xe2\x87\x91",
'uacute' => "\xc3\xba",
'uacute;' => "\xc3\xba",
'uarr;' => "\xe2\x86\x91",
'ucirc' => "\xc3\xbb",
'ucirc;' => "\xc3\xbb",
'ugrave' => "\xc3\xb9",
'ugrave;' => "\xc3\xb9",
'uml' => "\xc2\xa8",
'uml;' => "\xc2\xa8",
'upsih;' => "\xcf\x92",
'upsilon;' => "\xcf\x85",
'uuml' => "\xc3\xbc",
'uuml;' => "\xc3\xbc",
'weierp;' => "\xe2\x84\x98",
'xi;' => "\xce\xbe",
'yacute' => "\xc3\xbd",
'yacute;' => "\xc3\xbd",
'yen' => "\xc2\xa5",
'yen;' => "\xc2\xa5",
'yuml' => "\xc3\xbf",
'yuml;' => "\xc3\xbf",
'zeta;' => "\xce\xb6",
'zwj;' => "\xe2\x80\x8d",
'zwnj;' => "\xe2\x80\x8c"
}
ENCODINGS = %w[
ansi_x3.4-1968
iso-ir-6
ansi_x3.4-1986
iso_646.irv:1991
ascii
iso646-us
us-ascii
us
ibm367
cp367
csascii
ks_c_5601-1987
korean
iso-2022-kr
csiso2022kr
euc-kr
iso-2022-jp
csiso2022jp
iso-2022-jp-2
iso-ir-58
chinese
csiso58gb231280
iso_8859-1:1987
iso-ir-100
iso_8859-1
iso-8859-1
latin1
l1
ibm819
cp819
csisolatin1
iso_8859-2:1987
iso-ir-101
iso_8859-2
iso-8859-2
latin2
l2
csisolatin2
iso_8859-3:1988
iso-ir-109
iso_8859-3
iso-8859-3
latin3
l3
csisolatin3
iso_8859-4:1988
iso-ir-110
iso_8859-4
iso-8859-4
latin4
l4
csisolatin4
iso_8859-6:1987
iso-ir-127
iso_8859-6
iso-8859-6
ecma-114
asmo-708
arabic
csisolatinarabic
iso_8859-7:1987
iso-ir-126
iso_8859-7
iso-8859-7
elot_928
ecma-118
greek
greek8
csisolatingreek
iso_8859-8:1988
iso-ir-138
iso_8859-8
iso-8859-8
hebrew
csisolatinhebrew
iso_8859-5:1988
iso-ir-144
iso_8859-5
iso-8859-5
cyrillic
csisolatincyrillic
iso_8859-9:1989
iso-ir-148
iso_8859-9
iso-8859-9
latin5
l5
csisolatin5
iso-8859-10
iso-ir-157
l6
iso_8859-10:1992
csisolatin6
latin6
hp-roman8
roman8
r8
ibm037
cp037
csibm037
ibm424
cp424
csibm424
ibm437
cp437
437
cspc8codepage437
ibm500
cp500
csibm500
ibm775
cp775
cspc775baltic
ibm850
cp850
850
cspc850multilingual
ibm852
cp852
852
cspcp852
ibm855
cp855
855
csibm855
ibm857
cp857
857
csibm857
ibm860
cp860
860
csibm860
ibm861
cp861
861
cp-is
csibm861
ibm862
cp862
862
cspc862latinhebrew
ibm863
cp863
863
csibm863
ibm864
cp864
csibm864
ibm865
cp865
865
csibm865
ibm866
cp866
866
csibm866
ibm869
cp869
869
cp-gr
csibm869
ibm1026
cp1026
csibm1026
koi8-r
cskoi8r
koi8-u
big5-hkscs
ptcp154
csptcp154
pt154
cp154
utf-7
utf-16be
utf-16le
utf-16
utf-8
iso-8859-13
iso-8859-14
iso-ir-199
iso_8859-14:1998
iso_8859-14
latin8
iso-celtic
l8
iso-8859-15
iso_8859-15
iso-8859-16
iso-ir-226
iso_8859-16:2001
iso_8859-16
latin10
l10
gbk
cp936
ms936
gb18030
shift_jis
ms_kanji
csshiftjis
euc-jp
gb2312
big5
csbig5
windows-1250
windows-1251
windows-1252
windows-1253
windows-1254
windows-1255
windows-1256
windows-1257
windows-1258
tis-620
hz-gb-2312
]
E = {
"null-character" =>
_("Null character in input stream, replaced with U+FFFD."),
"incorrectly-placed-solidus" =>
_("Solidus (/) incorrectly placed in tag."),
"incorrect-cr-newline-entity" =>
_("Incorrect CR newline entity, replaced with LF."),
"illegal-windows-1252-entity" =>
_("Entity used with illegal number (windows-1252 reference)."),
"cant-convert-numeric-entity" =>
_("Numeric entity couldn't be converted to character " +
"(codepoint U+%(charAsInt)08x)."),
"illegal-codepoint-for-numeric-entity" =>
_("Numeric entity represents an illegal codepoint=> " +
"U+%(charAsInt)08x."),
"numeric-entity-without-semicolon" =>
_("Numeric entity didn't end with ';'."),
"expected-numeric-entity-but-got-eof" =>
_("Numeric entity expected. Got end of file instead."),
"expected-numeric-entity" =>
_("Numeric entity expected but none found."),
"named-entity-without-semicolon" =>
_("Named entity didn't end with ';'."),
"expected-named-entity" =>
_("Named entity expected. Got none."),
"attributes-in-end-tag" =>
_("End tag contains unexpected attributes."),
"expected-tag-name-but-got-right-bracket" =>
_("Expected tag name. Got '>' instead."),
"expected-tag-name-but-got-question-mark" =>
_("Expected tag name. Got '?' instead. (HTML doesn't " +
"support processing instructions.)"),
"expected-tag-name" =>
_("Expected tag name. Got something else instead"),
"expected-closing-tag-but-got-right-bracket" =>
_("Expected closing tag. Got '>' instead. Ignoring '>'."),
"expected-closing-tag-but-got-eof" =>
_("Expected closing tag. Unexpected end of file."),
"expected-closing-tag-but-got-char" =>
_("Expected closing tag. Unexpected character '%(data)' found."),
"eof-in-tag-name" =>
_("Unexpected end of file in the tag name."),
"expected-attribute-name-but-got-eof" =>
_("Unexpected end of file. Expected attribute name instead."),
"eof-in-attribute-name" =>
_("Unexpected end of file in attribute name."),
"duplicate-attribute" =>
_("Dropped duplicate attribute on tag."),
"expected-end-of-tag-name-but-got-eof" =>
_("Unexpected end of file. Expected = or end of tag."),
"expected-attribute-value-but-got-eof" =>
_("Unexpected end of file. Expected attribute value."),
"eof-in-attribute-value-double-quote" =>
_("Unexpected end of file in attribute value (\")."),
"eof-in-attribute-value-single-quote" =>
_("Unexpected end of file in attribute value (')."),
"eof-in-attribute-value-no-quotes" =>
_("Unexpected end of file in attribute value."),
"expected-dashes-or-doctype" =>
_("Expected '--' or 'DOCTYPE'. Not found."),
"incorrect-comment" =>
_("Incorrect comment."),
"eof-in-comment" =>
_("Unexpected end of file in comment."),
"eof-in-comment-end-dash" =>
_("Unexpected end of file in comment (-)"),
"unexpected-dash-after-double-dash-in-comment" =>
_("Unexpected '-' after '--' found in comment."),
"eof-in-comment-double-dash" =>
_("Unexpected end of file in comment (--)."),
"unexpected-char-in-comment" =>
_("Unexpected character in comment found."),
"need-space-after-doctype" =>
_("No space after literal string 'DOCTYPE'."),
"expected-doctype-name-but-got-right-bracket" =>
_("Unexpected > character. Expected DOCTYPE name."),
"expected-doctype-name-but-got-eof" =>
_("Unexpected end of file. Expected DOCTYPE name."),
"eof-in-doctype-name" =>
_("Unexpected end of file in DOCTYPE name."),
"eof-in-doctype" =>
_("Unexpected end of file in DOCTYPE."),
"expected-space-or-right-bracket-in-doctype" =>
_("Expected space or '>'. Got '%(data)'"),
"unexpected-end-of-doctype" =>
_("Unexpected end of DOCTYPE."),
"unexpected-char-in-doctype" =>
_("Unexpected character in DOCTYPE."),
"eof-in-bogus-doctype" =>
_("Unexpected end of file in bogus doctype."),
"eof-in-innerhtml" =>
_("XXX innerHTML EOF"),
"unexpected-doctype" =>
_("Unexpected DOCTYPE. Ignored."),
"non-html-root" =>
_("html needs to be the first start tag."),
"expected-doctype-but-got-eof" =>
_("Unexpected End of file. Expected DOCTYPE."),
"unknown-doctype" =>
_("Erroneous DOCTYPE."),
"expected-doctype-but-got-chars" =>
_("Unexpected non-space characters. Expected DOCTYPE."),
"expected-doctype-but-got-start-tag" =>
_("Unexpected start tag (%(name)). Expected DOCTYPE."),
"expected-doctype-but-got-end-tag" =>
_("Unexpected end tag (%(name)). Expected DOCTYPE."),
"end-tag-after-implied-root" =>
_("Unexpected end tag (%(name)) after the (implied) root element."),
"expected-named-closing-tag-but-got-eof" =>
_("Unexpected end of file. Expected end tag (%(name))."),
"two-heads-are-not-better-than-one" =>
_("Unexpected start tag head in existing head. Ignored."),
"unexpected-end-tag" =>
_("Unexpected end tag (%(name)). Ignored."),
"unexpected-start-tag-out-of-my-head" =>
_("Unexpected start tag (%(name)) that can be in head. Moved."),
"unexpected-start-tag" =>
_("Unexpected start tag (%(name))."),
"missing-end-tag" =>
_("Missing end tag (%(name))."),
"missing-end-tags" =>
_("Missing end tags (%(name))."),
"unexpected-start-tag-implies-end-tag" =>
_("Unexpected start tag (%(startName)) " +
"implies end tag (%(endName))."),
"unexpected-start-tag-treated-as" =>
_("Unexpected start tag (%(originalName)). Treated as %(newName)."),
"deprecated-tag" =>
_("Unexpected start tag %(name). Don't use it!"),
"unexpected-start-tag-ignored" =>
_("Unexpected start tag %(name). Ignored."),
"expected-one-end-tag-but-got-another" =>
_("Unexpected end tag (%(gotName)). " +
"Missing end tag (%(expectedName))."),
"end-tag-too-early" =>
_("End tag (%(name)) seen too early. Expected other end tag."),
"end-tag-too-early-named" =>
_("Unexpected end tag (%(gotName)). Expected end tag (%(expectedName))."),
"end-tag-too-early-ignored" =>
_("End tag (%(name)) seen too early. Ignored."),
"adoption-agency-1.1" =>
_("End tag (%(name)) violates step 1, " +
"paragraph 1 of the adoption agency algorithm."),
"adoption-agency-1.2" =>
_("End tag (%(name)) violates step 1, " +
"paragraph 2 of the adoption agency algorithm."),
"adoption-agency-1.3" =>
_("End tag (%(name)) violates step 1, " +
"paragraph 3 of the adoption agency algorithm."),
"unexpected-end-tag-treated-as" =>
_("Unexpected end tag (%(originalName)). Treated as %(newName)."),
"no-end-tag" =>
_("This element (%(name)) has no end tag."),
"unexpected-implied-end-tag-in-table" =>
_("Unexpected implied end tag (%(name)) in the table phase."),
"unexpected-implied-end-tag-in-table-body" =>
_("Unexpected implied end tag (%(name)) in the table body phase."),
"unexpected-char-implies-table-voodoo" =>
_("Unexpected non-space characters in " +
"table context caused voodoo mode."),
"unexpected-start-tag-implies-table-voodoo" =>
_("Unexpected start tag (%(name)) in " +
"table context caused voodoo mode."),
"unexpected-end-tag-implies-table-voodoo" =>
_("Unexpected end tag (%(name)) in " +
"table context caused voodoo mode."),
"unexpected-cell-in-table-body" =>
_("Unexpected table cell start tag (%(name)) " +
"in the table body phase."),
"unexpected-cell-end-tag" =>
_("Got table cell end tag (%(name)) " +
"while required end tags are missing."),
"unexpected-end-tag-in-table-body" =>
_("Unexpected end tag (%(name)) in the table body phase. Ignored."),
"unexpected-implied-end-tag-in-table-row" =>
_("Unexpected implied end tag (%(name)) in the table row phase."),
"unexpected-end-tag-in-table-row" =>
_("Unexpected end tag (%(name)) in the table row phase. Ignored."),
"unexpected-select-in-select" =>
_("Unexpected select start tag in the select phase " +
"implies select start tag."),
"unexpected-start-tag-in-select" =>
_("Unexpected start tag token (%(name)) in the select phase. " +
"Ignored."),
"unexpected-end-tag-in-select" =>
_("Unexpected end tag (%(name)) in the select phase. Ignored."),
"unexpected-char-after-body" =>
_("Unexpected non-space characters in the after body phase."),
"unexpected-start-tag-after-body" =>
_("Unexpected start tag token (%(name))" +
" in the after body phase."),
"unexpected-end-tag-after-body" =>
_("Unexpected end tag token (%(name))" +
" in the after body phase."),
"unexpected-char-in-frameset" =>
_("Unepxected characters in the frameset phase. Characters ignored."),
"unexpected-start-tag-in-frameset" =>
_("Unexpected start tag token (%(name))" +
" in the frameset phase. Ignored."),
"unexpected-frameset-in-frameset-innerhtml" =>
_("Unexpected end tag token (frameset) " +
"in the frameset phase (innerHTML)."),
"unexpected-end-tag-in-frameset" =>
_("Unexpected end tag token (%(name))" +
" in the frameset phase. Ignored."),
"unexpected-char-after-frameset" =>
_("Unexpected non-space characters in the " +
"after frameset phase. Ignored."),
"unexpected-start-tag-after-frameset" =>
_("Unexpected start tag (%(name))" +
" in the after frameset phase. Ignored."),
"unexpected-end-tag-after-frameset" =>
_("Unexpected end tag (%(name))" +
" in the after frameset phase. Ignored."),
"expected-eof-but-got-char" =>
_("Unexpected non-space characters. Expected end of file."),
"expected-eof-but-got-start-tag" =>
_("Unexpected start tag (%(name))" +
". Expected end of file."),
"expected-eof-but-got-end-tag" =>
_("Unexpected end tag (%(name))" +
". Expected end of file."),
"unexpected-end-table-in-caption" =>
_("Unexpected end table tag in caption. Generates implied end caption.")
}
end