Sync with latest HTML5lib and latest Maruku
This commit is contained in:
parent
8e92e4a3ab
commit
8ccaad85a5
71 changed files with 1974 additions and 1621 deletions
817
vendor/plugins/HTML5lib/lib/html5/constants.rb
vendored
Executable file
817
vendor/plugins/HTML5lib/lib/html5/constants.rb
vendored
Executable file
|
@ -0,0 +1,817 @@
|
|||
module HTML5
|
||||
|
||||
class EOF < Exception; end
|
||||
|
||||
CONTENT_MODEL_FLAGS = [
|
||||
:PCDATA,
|
||||
:RCDATA,
|
||||
:CDATA,
|
||||
:PLAINTEXT
|
||||
]
|
||||
|
||||
SCOPING_ELEMENTS = %w[
|
||||
button
|
||||
caption
|
||||
html
|
||||
marquee
|
||||
object
|
||||
table
|
||||
td
|
||||
th
|
||||
]
|
||||
|
||||
FORMATTING_ELEMENTS = %w[
|
||||
a
|
||||
b
|
||||
big
|
||||
em
|
||||
font
|
||||
i
|
||||
nobr
|
||||
s
|
||||
small
|
||||
strike
|
||||
strong
|
||||
tt
|
||||
u
|
||||
]
|
||||
|
||||
SPECIAL_ELEMENTS = %w[
|
||||
address
|
||||
area
|
||||
base
|
||||
basefont
|
||||
bgsound
|
||||
blockquote
|
||||
body
|
||||
br
|
||||
center
|
||||
col
|
||||
colgroup
|
||||
dd
|
||||
dir
|
||||
div
|
||||
dl
|
||||
dt
|
||||
embed
|
||||
fieldset
|
||||
form
|
||||
frame
|
||||
frameset
|
||||
h1
|
||||
h2
|
||||
h3
|
||||
h4
|
||||
h5
|
||||
h6
|
||||
head
|
||||
hr
|
||||
iframe
|
||||
image
|
||||
img
|
||||
input
|
||||
isindex
|
||||
li
|
||||
link
|
||||
listing
|
||||
menu
|
||||
meta
|
||||
noembed
|
||||
noframes
|
||||
noscript
|
||||
ol
|
||||
optgroup
|
||||
option
|
||||
p
|
||||
param
|
||||
plaintext
|
||||
pre
|
||||
script
|
||||
select
|
||||
spacer
|
||||
style
|
||||
tbody
|
||||
textarea
|
||||
tfoot
|
||||
thead
|
||||
title
|
||||
tr
|
||||
ul
|
||||
wbr
|
||||
]
|
||||
|
||||
SPACE_CHARACTERS = %W[
|
||||
\t
|
||||
\n
|
||||
\x0B
|
||||
\x0C
|
||||
\x20
|
||||
\r
|
||||
]
|
||||
|
||||
TABLE_INSERT_MODE_ELEMENTS = %w[
|
||||
table
|
||||
tbody
|
||||
tfoot
|
||||
thead
|
||||
tr
|
||||
]
|
||||
|
||||
ASCII_LOWERCASE = ('a'..'z').to_a.join('')
|
||||
ASCII_UPPERCASE = ('A'..'Z').to_a.join('')
|
||||
ASCII_LETTERS = ASCII_LOWERCASE + ASCII_UPPERCASE
|
||||
DIGITS = '0'..'9'
|
||||
HEX_DIGITS = DIGITS.to_a + ('a'..'f').to_a + ('A'..'F').to_a
|
||||
|
||||
# Heading elements need to be ordered
|
||||
HEADING_ELEMENTS = %w[
|
||||
h1
|
||||
h2
|
||||
h3
|
||||
h4
|
||||
h5
|
||||
h6
|
||||
]
|
||||
|
||||
# XXX What about event-source and command?
|
||||
VOID_ELEMENTS = %w[
|
||||
base
|
||||
link
|
||||
meta
|
||||
hr
|
||||
br
|
||||
img
|
||||
embed
|
||||
param
|
||||
area
|
||||
col
|
||||
input
|
||||
]
|
||||
|
||||
CDATA_ELEMENTS = %w[title textarea]
|
||||
|
||||
RCDATA_ELEMENTS = %w[
|
||||
style
|
||||
script
|
||||
xmp
|
||||
iframe
|
||||
noembed
|
||||
noframes
|
||||
noscript
|
||||
]
|
||||
|
||||
BOOLEAN_ATTRIBUTES = {
|
||||
:global => %w[irrelevant],
|
||||
'style' => %w[scoped],
|
||||
'img' => %w[ismap],
|
||||
'audio' => %w[autoplay controls],
|
||||
'video' => %w[autoplay controls],
|
||||
'script' => %w[defer async],
|
||||
'details' => %w[open],
|
||||
'datagrid' => %w[multiple disabled],
|
||||
'command' => %w[hidden disabled checked default],
|
||||
'menu' => %w[autosubmit],
|
||||
'fieldset' => %w[disabled readonly],
|
||||
'option' => %w[disabled readonly selected],
|
||||
'optgroup' => %w[disabled readonly],
|
||||
'button' => %w[disabled autofocus],
|
||||
'input' => %w[disabled readonly required autofocus checked ismap],
|
||||
'select' => %w[disabled readonly autofocus multiple],
|
||||
'output' => %w[disabled readonly]
|
||||
}
|
||||
|
||||
# entitiesWindows1252 has to be _ordered_ and needs to have an index.
|
||||
ENTITIES_WINDOWS1252 = [
|
||||
8364, # 0x80 0x20AC EURO SIGN
|
||||
65533, # 0x81 UNDEFINED
|
||||
8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK
|
||||
402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK
|
||||
8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK
|
||||
8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS
|
||||
8224, # 0x86 0x2020 DAGGER
|
||||
8225, # 0x87 0x2021 DOUBLE DAGGER
|
||||
710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
8240, # 0x89 0x2030 PER MILLE SIGN
|
||||
352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON
|
||||
8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE
|
||||
65533, # 0x8D UNDEFINED
|
||||
381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON
|
||||
65533, # 0x8F UNDEFINED
|
||||
65533, # 0x90 UNDEFINED
|
||||
8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK
|
||||
8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK
|
||||
8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK
|
||||
8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK
|
||||
8226, # 0x95 0x2022 BULLET
|
||||
8211, # 0x96 0x2013 EN DASH
|
||||
8212, # 0x97 0x2014 EM DASH
|
||||
732, # 0x98 0x02DC SMALL TILDE
|
||||
8482, # 0x99 0x2122 TRADE MARK SIGN
|
||||
353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON
|
||||
8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE
|
||||
65533, # 0x9D UNDEFINED
|
||||
382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON
|
||||
376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
]
|
||||
|
||||
# ENTITIES was generated from Python using the following code:
|
||||
#
|
||||
# import constants
|
||||
# entities = constants.entities.items()
|
||||
# entities.sort()
|
||||
# list = [ ' '.join([repr(entity), '=>', ord(value)<128 and
|
||||
# repr(str(value)) or repr(value.encode('utf-8')).replace("'",'"')])
|
||||
# for entity, value in entities]
|
||||
# print ' ENTITIES = {\n ' + ',\n '.join(list) + '\n }'
|
||||
|
||||
ENTITIES = {
|
||||
'AElig' => "\xc3\x86",
|
||||
'AElig;' => "\xc3\x86",
|
||||
'AMP' => '&',
|
||||
'AMP;' => '&',
|
||||
'Aacute' => "\xc3\x81",
|
||||
'Aacute;' => "\xc3\x81",
|
||||
'Acirc' => "\xc3\x82",
|
||||
'Acirc;' => "\xc3\x82",
|
||||
'Agrave' => "\xc3\x80",
|
||||
'Agrave;' => "\xc3\x80",
|
||||
'Alpha;' => "\xce\x91",
|
||||
'Aring' => "\xc3\x85",
|
||||
'Aring;' => "\xc3\x85",
|
||||
'Atilde' => "\xc3\x83",
|
||||
'Atilde;' => "\xc3\x83",
|
||||
'Auml' => "\xc3\x84",
|
||||
'Auml;' => "\xc3\x84",
|
||||
'Beta;' => "\xce\x92",
|
||||
'COPY' => "\xc2\xa9",
|
||||
'COPY;' => "\xc2\xa9",
|
||||
'Ccedil' => "\xc3\x87",
|
||||
'Ccedil;' => "\xc3\x87",
|
||||
'Chi;' => "\xce\xa7",
|
||||
'Dagger;' => "\xe2\x80\xa1",
|
||||
'Delta;' => "\xce\x94",
|
||||
'ETH' => "\xc3\x90",
|
||||
'ETH;' => "\xc3\x90",
|
||||
'Eacute' => "\xc3\x89",
|
||||
'Eacute;' => "\xc3\x89",
|
||||
'Ecirc' => "\xc3\x8a",
|
||||
'Ecirc;' => "\xc3\x8a",
|
||||
'Egrave' => "\xc3\x88",
|
||||
'Egrave;' => "\xc3\x88",
|
||||
'Epsilon;' => "\xce\x95",
|
||||
'Eta;' => "\xce\x97",
|
||||
'Euml' => "\xc3\x8b",
|
||||
'Euml;' => "\xc3\x8b",
|
||||
'GT' => '>',
|
||||
'GT;' => '>',
|
||||
'Gamma;' => "\xce\x93",
|
||||
'Iacute' => "\xc3\x8d",
|
||||
'Iacute;' => "\xc3\x8d",
|
||||
'Icirc' => "\xc3\x8e",
|
||||
'Icirc;' => "\xc3\x8e",
|
||||
'Igrave' => "\xc3\x8c",
|
||||
'Igrave;' => "\xc3\x8c",
|
||||
'Iota;' => "\xce\x99",
|
||||
'Iuml' => "\xc3\x8f",
|
||||
'Iuml;' => "\xc3\x8f",
|
||||
'Kappa;' => "\xce\x9a",
|
||||
'LT' => '<',
|
||||
'LT;' => '<',
|
||||
'Lambda;' => "\xce\x9b",
|
||||
'Mu;' => "\xce\x9c",
|
||||
'Ntilde' => "\xc3\x91",
|
||||
'Ntilde;' => "\xc3\x91",
|
||||
'Nu;' => "\xce\x9d",
|
||||
'OElig;' => "\xc5\x92",
|
||||
'Oacute' => "\xc3\x93",
|
||||
'Oacute;' => "\xc3\x93",
|
||||
'Ocirc' => "\xc3\x94",
|
||||
'Ocirc;' => "\xc3\x94",
|
||||
'Ograve' => "\xc3\x92",
|
||||
'Ograve;' => "\xc3\x92",
|
||||
'Omega;' => "\xce\xa9",
|
||||
'Omicron;' => "\xce\x9f",
|
||||
'Oslash' => "\xc3\x98",
|
||||
'Oslash;' => "\xc3\x98",
|
||||
'Otilde' => "\xc3\x95",
|
||||
'Otilde;' => "\xc3\x95",
|
||||
'Ouml' => "\xc3\x96",
|
||||
'Ouml;' => "\xc3\x96",
|
||||
'Phi;' => "\xce\xa6",
|
||||
'Pi;' => "\xce\xa0",
|
||||
'Prime;' => "\xe2\x80\xb3",
|
||||
'Psi;' => "\xce\xa8",
|
||||
'QUOT' => '"',
|
||||
'QUOT;' => '"',
|
||||
'REG' => "\xc2\xae",
|
||||
'REG;' => "\xc2\xae",
|
||||
'Rho;' => "\xce\xa1",
|
||||
'Scaron;' => "\xc5\xa0",
|
||||
'Sigma;' => "\xce\xa3",
|
||||
'THORN' => "\xc3\x9e",
|
||||
'THORN;' => "\xc3\x9e",
|
||||
'TRADE;' => "\xe2\x84\xa2",
|
||||
'Tau;' => "\xce\xa4",
|
||||
'Theta;' => "\xce\x98",
|
||||
'Uacute' => "\xc3\x9a",
|
||||
'Uacute;' => "\xc3\x9a",
|
||||
'Ucirc' => "\xc3\x9b",
|
||||
'Ucirc;' => "\xc3\x9b",
|
||||
'Ugrave' => "\xc3\x99",
|
||||
'Ugrave;' => "\xc3\x99",
|
||||
'Upsilon;' => "\xce\xa5",
|
||||
'Uuml' => "\xc3\x9c",
|
||||
'Uuml;' => "\xc3\x9c",
|
||||
'Xi;' => "\xce\x9e",
|
||||
'Yacute' => "\xc3\x9d",
|
||||
'Yacute;' => "\xc3\x9d",
|
||||
'Yuml;' => "\xc5\xb8",
|
||||
'Zeta;' => "\xce\x96",
|
||||
'aacute' => "\xc3\xa1",
|
||||
'aacute;' => "\xc3\xa1",
|
||||
'acirc' => "\xc3\xa2",
|
||||
'acirc;' => "\xc3\xa2",
|
||||
'acute' => "\xc2\xb4",
|
||||
'acute;' => "\xc2\xb4",
|
||||
'aelig' => "\xc3\xa6",
|
||||
'aelig;' => "\xc3\xa6",
|
||||
'agrave' => "\xc3\xa0",
|
||||
'agrave;' => "\xc3\xa0",
|
||||
'alefsym;' => "\xe2\x84\xb5",
|
||||
'alpha;' => "\xce\xb1",
|
||||
'amp' => '&',
|
||||
'amp;' => '&',
|
||||
'and;' => "\xe2\x88\xa7",
|
||||
'ang;' => "\xe2\x88\xa0",
|
||||
'apos;' => "'",
|
||||
'aring' => "\xc3\xa5",
|
||||
'aring;' => "\xc3\xa5",
|
||||
'asymp;' => "\xe2\x89\x88",
|
||||
'atilde' => "\xc3\xa3",
|
||||
'atilde;' => "\xc3\xa3",
|
||||
'auml' => "\xc3\xa4",
|
||||
'auml;' => "\xc3\xa4",
|
||||
'bdquo;' => "\xe2\x80\x9e",
|
||||
'beta;' => "\xce\xb2",
|
||||
'brvbar' => "\xc2\xa6",
|
||||
'brvbar;' => "\xc2\xa6",
|
||||
'bull;' => "\xe2\x80\xa2",
|
||||
'cap;' => "\xe2\x88\xa9",
|
||||
'ccedil' => "\xc3\xa7",
|
||||
'ccedil;' => "\xc3\xa7",
|
||||
'cedil' => "\xc2\xb8",
|
||||
'cedil;' => "\xc2\xb8",
|
||||
'cent' => "\xc2\xa2",
|
||||
'cent;' => "\xc2\xa2",
|
||||
'chi;' => "\xcf\x87",
|
||||
'circ;' => "\xcb\x86",
|
||||
'clubs;' => "\xe2\x99\xa3",
|
||||
'cong;' => "\xe2\x89\x85",
|
||||
'copy' => "\xc2\xa9",
|
||||
'copy;' => "\xc2\xa9",
|
||||
'crarr;' => "\xe2\x86\xb5",
|
||||
'cup;' => "\xe2\x88\xaa",
|
||||
'curren' => "\xc2\xa4",
|
||||
'curren;' => "\xc2\xa4",
|
||||
'dArr;' => "\xe2\x87\x93",
|
||||
'dagger;' => "\xe2\x80\xa0",
|
||||
'darr;' => "\xe2\x86\x93",
|
||||
'deg' => "\xc2\xb0",
|
||||
'deg;' => "\xc2\xb0",
|
||||
'delta;' => "\xce\xb4",
|
||||
'diams;' => "\xe2\x99\xa6",
|
||||
'divide' => "\xc3\xb7",
|
||||
'divide;' => "\xc3\xb7",
|
||||
'eacute' => "\xc3\xa9",
|
||||
'eacute;' => "\xc3\xa9",
|
||||
'ecirc' => "\xc3\xaa",
|
||||
'ecirc;' => "\xc3\xaa",
|
||||
'egrave' => "\xc3\xa8",
|
||||
'egrave;' => "\xc3\xa8",
|
||||
'empty;' => "\xe2\x88\x85",
|
||||
'emsp;' => "\xe2\x80\x83",
|
||||
'ensp;' => "\xe2\x80\x82",
|
||||
'epsilon;' => "\xce\xb5",
|
||||
'equiv;' => "\xe2\x89\xa1",
|
||||
'eta;' => "\xce\xb7",
|
||||
'eth' => "\xc3\xb0",
|
||||
'eth;' => "\xc3\xb0",
|
||||
'euml' => "\xc3\xab",
|
||||
'euml;' => "\xc3\xab",
|
||||
'euro;' => "\xe2\x82\xac",
|
||||
'exist;' => "\xe2\x88\x83",
|
||||
'fnof;' => "\xc6\x92",
|
||||
'forall;' => "\xe2\x88\x80",
|
||||
'frac12' => "\xc2\xbd",
|
||||
'frac12;' => "\xc2\xbd",
|
||||
'frac14' => "\xc2\xbc",
|
||||
'frac14;' => "\xc2\xbc",
|
||||
'frac34' => "\xc2\xbe",
|
||||
'frac34;' => "\xc2\xbe",
|
||||
'frasl;' => "\xe2\x81\x84",
|
||||
'gamma;' => "\xce\xb3",
|
||||
'ge;' => "\xe2\x89\xa5",
|
||||
'gt' => '>',
|
||||
'gt;' => '>',
|
||||
'hArr;' => "\xe2\x87\x94",
|
||||
'harr;' => "\xe2\x86\x94",
|
||||
'hearts;' => "\xe2\x99\xa5",
|
||||
'hellip;' => "\xe2\x80\xa6",
|
||||
'iacute' => "\xc3\xad",
|
||||
'iacute;' => "\xc3\xad",
|
||||
'icirc' => "\xc3\xae",
|
||||
'icirc;' => "\xc3\xae",
|
||||
'iexcl' => "\xc2\xa1",
|
||||
'iexcl;' => "\xc2\xa1",
|
||||
'igrave' => "\xc3\xac",
|
||||
'igrave;' => "\xc3\xac",
|
||||
'image;' => "\xe2\x84\x91",
|
||||
'infin;' => "\xe2\x88\x9e",
|
||||
'int;' => "\xe2\x88\xab",
|
||||
'iota;' => "\xce\xb9",
|
||||
'iquest' => "\xc2\xbf",
|
||||
'iquest;' => "\xc2\xbf",
|
||||
'isin;' => "\xe2\x88\x88",
|
||||
'iuml' => "\xc3\xaf",
|
||||
'iuml;' => "\xc3\xaf",
|
||||
'kappa;' => "\xce\xba",
|
||||
'lArr;' => "\xe2\x87\x90",
|
||||
'lambda;' => "\xce\xbb",
|
||||
'lang;' => "\xe3\x80\x88",
|
||||
'laquo' => "\xc2\xab",
|
||||
'laquo;' => "\xc2\xab",
|
||||
'larr;' => "\xe2\x86\x90",
|
||||
'lceil;' => "\xe2\x8c\x88",
|
||||
'ldquo;' => "\xe2\x80\x9c",
|
||||
'le;' => "\xe2\x89\xa4",
|
||||
'lfloor;' => "\xe2\x8c\x8a",
|
||||
'lowast;' => "\xe2\x88\x97",
|
||||
'loz;' => "\xe2\x97\x8a",
|
||||
'lrm;' => "\xe2\x80\x8e",
|
||||
'lsaquo;' => "\xe2\x80\xb9",
|
||||
'lsquo;' => "\xe2\x80\x98",
|
||||
'lt' => '<',
|
||||
'lt;' => '<',
|
||||
'macr' => "\xc2\xaf",
|
||||
'macr;' => "\xc2\xaf",
|
||||
'mdash;' => "\xe2\x80\x94",
|
||||
'micro' => "\xc2\xb5",
|
||||
'micro;' => "\xc2\xb5",
|
||||
'middot' => "\xc2\xb7",
|
||||
'middot;' => "\xc2\xb7",
|
||||
'minus;' => "\xe2\x88\x92",
|
||||
'mu;' => "\xce\xbc",
|
||||
'nabla;' => "\xe2\x88\x87",
|
||||
'nbsp' => "\xc2\xa0",
|
||||
'nbsp;' => "\xc2\xa0",
|
||||
'ndash;' => "\xe2\x80\x93",
|
||||
'ne;' => "\xe2\x89\xa0",
|
||||
'ni;' => "\xe2\x88\x8b",
|
||||
'not' => "\xc2\xac",
|
||||
'not;' => "\xc2\xac",
|
||||
'notin;' => "\xe2\x88\x89",
|
||||
'nsub;' => "\xe2\x8a\x84",
|
||||
'ntilde' => "\xc3\xb1",
|
||||
'ntilde;' => "\xc3\xb1",
|
||||
'nu;' => "\xce\xbd",
|
||||
'oacute' => "\xc3\xb3",
|
||||
'oacute;' => "\xc3\xb3",
|
||||
'ocirc' => "\xc3\xb4",
|
||||
'ocirc;' => "\xc3\xb4",
|
||||
'oelig;' => "\xc5\x93",
|
||||
'ograve' => "\xc3\xb2",
|
||||
'ograve;' => "\xc3\xb2",
|
||||
'oline;' => "\xe2\x80\xbe",
|
||||
'omega;' => "\xcf\x89",
|
||||
'omicron;' => "\xce\xbf",
|
||||
'oplus;' => "\xe2\x8a\x95",
|
||||
'or;' => "\xe2\x88\xa8",
|
||||
'ordf' => "\xc2\xaa",
|
||||
'ordf;' => "\xc2\xaa",
|
||||
'ordm' => "\xc2\xba",
|
||||
'ordm;' => "\xc2\xba",
|
||||
'oslash' => "\xc3\xb8",
|
||||
'oslash;' => "\xc3\xb8",
|
||||
'otilde' => "\xc3\xb5",
|
||||
'otilde;' => "\xc3\xb5",
|
||||
'otimes;' => "\xe2\x8a\x97",
|
||||
'ouml' => "\xc3\xb6",
|
||||
'ouml;' => "\xc3\xb6",
|
||||
'para' => "\xc2\xb6",
|
||||
'para;' => "\xc2\xb6",
|
||||
'part;' => "\xe2\x88\x82",
|
||||
'permil;' => "\xe2\x80\xb0",
|
||||
'perp;' => "\xe2\x8a\xa5",
|
||||
'phi;' => "\xcf\x86",
|
||||
'pi;' => "\xcf\x80",
|
||||
'piv;' => "\xcf\x96",
|
||||
'plusmn' => "\xc2\xb1",
|
||||
'plusmn;' => "\xc2\xb1",
|
||||
'pound' => "\xc2\xa3",
|
||||
'pound;' => "\xc2\xa3",
|
||||
'prime;' => "\xe2\x80\xb2",
|
||||
'prod;' => "\xe2\x88\x8f",
|
||||
'prop;' => "\xe2\x88\x9d",
|
||||
'psi;' => "\xcf\x88",
|
||||
'quot' => '"',
|
||||
'quot;' => '"',
|
||||
'rArr;' => "\xe2\x87\x92",
|
||||
'radic;' => "\xe2\x88\x9a",
|
||||
'rang;' => "\xe3\x80\x89",
|
||||
'raquo' => "\xc2\xbb",
|
||||
'raquo;' => "\xc2\xbb",
|
||||
'rarr;' => "\xe2\x86\x92",
|
||||
'rceil;' => "\xe2\x8c\x89",
|
||||
'rdquo;' => "\xe2\x80\x9d",
|
||||
'real;' => "\xe2\x84\x9c",
|
||||
'reg' => "\xc2\xae",
|
||||
'reg;' => "\xc2\xae",
|
||||
'rfloor;' => "\xe2\x8c\x8b",
|
||||
'rho;' => "\xcf\x81",
|
||||
'rlm;' => "\xe2\x80\x8f",
|
||||
'rsaquo;' => "\xe2\x80\xba",
|
||||
'rsquo;' => "\xe2\x80\x99",
|
||||
'sbquo;' => "\xe2\x80\x9a",
|
||||
'scaron;' => "\xc5\xa1",
|
||||
'sdot;' => "\xe2\x8b\x85",
|
||||
'sect' => "\xc2\xa7",
|
||||
'sect;' => "\xc2\xa7",
|
||||
'shy' => "\xc2\xad",
|
||||
'shy;' => "\xc2\xad",
|
||||
'sigma;' => "\xcf\x83",
|
||||
'sigmaf;' => "\xcf\x82",
|
||||
'sim;' => "\xe2\x88\xbc",
|
||||
'spades;' => "\xe2\x99\xa0",
|
||||
'sub;' => "\xe2\x8a\x82",
|
||||
'sube;' => "\xe2\x8a\x86",
|
||||
'sum;' => "\xe2\x88\x91",
|
||||
'sup1' => "\xc2\xb9",
|
||||
'sup1;' => "\xc2\xb9",
|
||||
'sup2' => "\xc2\xb2",
|
||||
'sup2;' => "\xc2\xb2",
|
||||
'sup3' => "\xc2\xb3",
|
||||
'sup3;' => "\xc2\xb3",
|
||||
'sup;' => "\xe2\x8a\x83",
|
||||
'supe;' => "\xe2\x8a\x87",
|
||||
'szlig' => "\xc3\x9f",
|
||||
'szlig;' => "\xc3\x9f",
|
||||
'tau;' => "\xcf\x84",
|
||||
'there4;' => "\xe2\x88\xb4",
|
||||
'theta;' => "\xce\xb8",
|
||||
'thetasym;' => "\xcf\x91",
|
||||
'thinsp;' => "\xe2\x80\x89",
|
||||
'thorn' => "\xc3\xbe",
|
||||
'thorn;' => "\xc3\xbe",
|
||||
'tilde;' => "\xcb\x9c",
|
||||
'times' => "\xc3\x97",
|
||||
'times;' => "\xc3\x97",
|
||||
'trade;' => "\xe2\x84\xa2",
|
||||
'uArr;' => "\xe2\x87\x91",
|
||||
'uacute' => "\xc3\xba",
|
||||
'uacute;' => "\xc3\xba",
|
||||
'uarr;' => "\xe2\x86\x91",
|
||||
'ucirc' => "\xc3\xbb",
|
||||
'ucirc;' => "\xc3\xbb",
|
||||
'ugrave' => "\xc3\xb9",
|
||||
'ugrave;' => "\xc3\xb9",
|
||||
'uml' => "\xc2\xa8",
|
||||
'uml;' => "\xc2\xa8",
|
||||
'upsih;' => "\xcf\x92",
|
||||
'upsilon;' => "\xcf\x85",
|
||||
'uuml' => "\xc3\xbc",
|
||||
'uuml;' => "\xc3\xbc",
|
||||
'weierp;' => "\xe2\x84\x98",
|
||||
'xi;' => "\xce\xbe",
|
||||
'yacute' => "\xc3\xbd",
|
||||
'yacute;' => "\xc3\xbd",
|
||||
'yen' => "\xc2\xa5",
|
||||
'yen;' => "\xc2\xa5",
|
||||
'yuml' => "\xc3\xbf",
|
||||
'yuml;' => "\xc3\xbf",
|
||||
'zeta;' => "\xce\xb6",
|
||||
'zwj;' => "\xe2\x80\x8d",
|
||||
'zwnj;' => "\xe2\x80\x8c"
|
||||
}
|
||||
|
||||
ENCODINGS = %w[
|
||||
ansi_x3.4-1968
|
||||
iso-ir-6
|
||||
ansi_x3.4-1986
|
||||
iso_646.irv:1991
|
||||
ascii
|
||||
iso646-us
|
||||
us-ascii
|
||||
us
|
||||
ibm367
|
||||
cp367
|
||||
csascii
|
||||
ks_c_5601-1987
|
||||
korean
|
||||
iso-2022-kr
|
||||
csiso2022kr
|
||||
euc-kr
|
||||
iso-2022-jp
|
||||
csiso2022jp
|
||||
iso-2022-jp-2
|
||||
iso-ir-58
|
||||
chinese
|
||||
csiso58gb231280
|
||||
iso_8859-1:1987
|
||||
iso-ir-100
|
||||
iso_8859-1
|
||||
iso-8859-1
|
||||
latin1
|
||||
l1
|
||||
ibm819
|
||||
cp819
|
||||
csisolatin1
|
||||
iso_8859-2:1987
|
||||
iso-ir-101
|
||||
iso_8859-2
|
||||
iso-8859-2
|
||||
latin2
|
||||
l2
|
||||
csisolatin2
|
||||
iso_8859-3:1988
|
||||
iso-ir-109
|
||||
iso_8859-3
|
||||
iso-8859-3
|
||||
latin3
|
||||
l3
|
||||
csisolatin3
|
||||
iso_8859-4:1988
|
||||
iso-ir-110
|
||||
iso_8859-4
|
||||
iso-8859-4
|
||||
latin4
|
||||
l4
|
||||
csisolatin4
|
||||
iso_8859-6:1987
|
||||
iso-ir-127
|
||||
iso_8859-6
|
||||
iso-8859-6
|
||||
ecma-114
|
||||
asmo-708
|
||||
arabic
|
||||
csisolatinarabic
|
||||
iso_8859-7:1987
|
||||
iso-ir-126
|
||||
iso_8859-7
|
||||
iso-8859-7
|
||||
elot_928
|
||||
ecma-118
|
||||
greek
|
||||
greek8
|
||||
csisolatingreek
|
||||
iso_8859-8:1988
|
||||
iso-ir-138
|
||||
iso_8859-8
|
||||
iso-8859-8
|
||||
hebrew
|
||||
csisolatinhebrew
|
||||
iso_8859-5:1988
|
||||
iso-ir-144
|
||||
iso_8859-5
|
||||
iso-8859-5
|
||||
cyrillic
|
||||
csisolatincyrillic
|
||||
iso_8859-9:1989
|
||||
iso-ir-148
|
||||
iso_8859-9
|
||||
iso-8859-9
|
||||
latin5
|
||||
l5
|
||||
csisolatin5
|
||||
iso-8859-10
|
||||
iso-ir-157
|
||||
l6
|
||||
iso_8859-10:1992
|
||||
csisolatin6
|
||||
latin6
|
||||
hp-roman8
|
||||
roman8
|
||||
r8
|
||||
ibm037
|
||||
cp037
|
||||
csibm037
|
||||
ibm424
|
||||
cp424
|
||||
csibm424
|
||||
ibm437
|
||||
cp437
|
||||
437
|
||||
cspc8codepage437
|
||||
ibm500
|
||||
cp500
|
||||
csibm500
|
||||
ibm775
|
||||
cp775
|
||||
cspc775baltic
|
||||
ibm850
|
||||
cp850
|
||||
850
|
||||
cspc850multilingual
|
||||
ibm852
|
||||
cp852
|
||||
852
|
||||
cspcp852
|
||||
ibm855
|
||||
cp855
|
||||
855
|
||||
csibm855
|
||||
ibm857
|
||||
cp857
|
||||
857
|
||||
csibm857
|
||||
ibm860
|
||||
cp860
|
||||
860
|
||||
csibm860
|
||||
ibm861
|
||||
cp861
|
||||
861
|
||||
cp-is
|
||||
csibm861
|
||||
ibm862
|
||||
cp862
|
||||
862
|
||||
cspc862latinhebrew
|
||||
ibm863
|
||||
cp863
|
||||
863
|
||||
csibm863
|
||||
ibm864
|
||||
cp864
|
||||
csibm864
|
||||
ibm865
|
||||
cp865
|
||||
865
|
||||
csibm865
|
||||
ibm866
|
||||
cp866
|
||||
866
|
||||
csibm866
|
||||
ibm869
|
||||
cp869
|
||||
869
|
||||
cp-gr
|
||||
csibm869
|
||||
ibm1026
|
||||
cp1026
|
||||
csibm1026
|
||||
koi8-r
|
||||
cskoi8r
|
||||
koi8-u
|
||||
big5-hkscs
|
||||
ptcp154
|
||||
csptcp154
|
||||
pt154
|
||||
cp154
|
||||
utf-7
|
||||
utf-16be
|
||||
utf-16le
|
||||
utf-16
|
||||
utf-8
|
||||
iso-8859-13
|
||||
iso-8859-14
|
||||
iso-ir-199
|
||||
iso_8859-14:1998
|
||||
iso_8859-14
|
||||
latin8
|
||||
iso-celtic
|
||||
l8
|
||||
iso-8859-15
|
||||
iso_8859-15
|
||||
iso-8859-16
|
||||
iso-ir-226
|
||||
iso_8859-16:2001
|
||||
iso_8859-16
|
||||
latin10
|
||||
l10
|
||||
gbk
|
||||
cp936
|
||||
ms936
|
||||
gb18030
|
||||
shift_jis
|
||||
ms_kanji
|
||||
csshiftjis
|
||||
euc-jp
|
||||
gb2312
|
||||
big5
|
||||
csbig5
|
||||
windows-1250
|
||||
windows-1251
|
||||
windows-1252
|
||||
windows-1253
|
||||
windows-1254
|
||||
windows-1255
|
||||
windows-1256
|
||||
windows-1257
|
||||
windows-1258
|
||||
tis-620
|
||||
hz-gb-2312
|
||||
]
|
||||
|
||||
end
|
1
vendor/plugins/HTML5lib/lib/html5/filters.rb
vendored
Normal file
1
vendor/plugins/HTML5lib/lib/html5/filters.rb
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
require 'html5/filters/optionaltags'
|
10
vendor/plugins/HTML5lib/lib/html5/filters/base.rb
vendored
Normal file
10
vendor/plugins/HTML5lib/lib/html5/filters/base.rb
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
require 'delegate'
|
||||
require 'enumerator'
|
||||
|
||||
module HTML5
|
||||
module Filters
|
||||
class Base < SimpleDelegator
|
||||
include Enumerable
|
||||
end
|
||||
end
|
||||
end
|
85
vendor/plugins/HTML5lib/lib/html5/filters/inject_meta_charset.rb
vendored
Normal file
85
vendor/plugins/HTML5lib/lib/html5/filters/inject_meta_charset.rb
vendored
Normal file
|
@ -0,0 +1,85 @@
|
|||
require 'html5/filters/base'
|
||||
|
||||
module HTML5
|
||||
module Filters
|
||||
class InjectMetaCharset < Base
|
||||
def initialize(source, encoding)
|
||||
super(source)
|
||||
@encoding = encoding
|
||||
end
|
||||
|
||||
def each
|
||||
state = :pre_head
|
||||
meta_found = @encoding.nil?
|
||||
pending = []
|
||||
|
||||
__getobj__.each do |token|
|
||||
case token[:type]
|
||||
when :StartTag
|
||||
state = :in_head if token[:name].downcase == "head"
|
||||
|
||||
when :EmptyTag
|
||||
if token[:name].downcase == "meta"
|
||||
# replace charset with actual encoding
|
||||
token[:data].each_with_index do |(name,value),index|
|
||||
if name == 'charset'
|
||||
token[:data][index][1]=@encoding
|
||||
meta_found = true
|
||||
end
|
||||
end
|
||||
|
||||
# replace charset with actual encoding
|
||||
has_http_equiv_content_type = false
|
||||
content_index = -1
|
||||
token[:data].each_with_index do |(name,value),i|
|
||||
if name.downcase == 'charset'
|
||||
token[:data][i] = ['charset', @encoding]
|
||||
meta_found = true
|
||||
break
|
||||
elsif name == 'http-equiv' and value.downcase == 'content-type'
|
||||
has_http_equiv_content_type = true
|
||||
elsif name == 'content'
|
||||
content_index = i
|
||||
end
|
||||
end
|
||||
|
||||
if not meta_found
|
||||
if has_http_equiv_content_type and content_index >= 0
|
||||
token[:data][content_index][1] =
|
||||
'text/html; charset=%s' % @encoding
|
||||
meta_found = true
|
||||
end
|
||||
end
|
||||
|
||||
elsif token[:name].downcase == "head" and not meta_found
|
||||
# insert meta into empty head
|
||||
yield(:type => :StartTag, :name => "head", :data => token[:data])
|
||||
yield(:type => :EmptyTag, :name => "meta",
|
||||
:data => [["charset", @encoding]])
|
||||
yield(:type => :EndTag, :name => "head")
|
||||
meta_found = true
|
||||
next
|
||||
end
|
||||
|
||||
when :EndTag
|
||||
if token[:name].downcase == "head" and pending.any?
|
||||
# insert meta into head (if necessary) and flush pending queue
|
||||
yield pending.shift
|
||||
yield(:type => :EmptyTag, :name => "meta",
|
||||
:data => [["charset", @encoding]]) if not meta_found
|
||||
yield pending.shift while pending.any?
|
||||
meta_found = true
|
||||
state = :post_head
|
||||
end
|
||||
end
|
||||
|
||||
if state == :in_head
|
||||
pending << token
|
||||
else
|
||||
yield token
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
199
vendor/plugins/HTML5lib/lib/html5/filters/optionaltags.rb
vendored
Normal file
199
vendor/plugins/HTML5lib/lib/html5/filters/optionaltags.rb
vendored
Normal file
|
@ -0,0 +1,199 @@
|
|||
require 'html5/constants'
|
||||
require 'html5/filters/base'
|
||||
|
||||
module HTML5
|
||||
module Filters
|
||||
|
||||
class OptionalTagFilter < Base
|
||||
def slider
|
||||
previous1 = previous2 = nil
|
||||
__getobj__.each do |token|
|
||||
yield previous2, previous1, token if previous1 != nil
|
||||
previous2 = previous1
|
||||
previous1 = token
|
||||
end
|
||||
yield previous2, previous1, nil
|
||||
end
|
||||
|
||||
def each
|
||||
slider do |previous, token, nexttok|
|
||||
type = token[:type]
|
||||
if type == :StartTag
|
||||
yield token unless token[:data].empty? and is_optional_start(token[:name], previous, nexttok)
|
||||
elsif type == :EndTag
|
||||
yield token unless is_optional_end(token[:name], nexttok)
|
||||
else
|
||||
yield token
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def is_optional_start(tagname, previous, nexttok)
|
||||
type = nexttok ? nexttok[:type] : nil
|
||||
if tagname == 'html'
|
||||
# An html element's start tag may be omitted if the first thing
|
||||
# inside the html element is not a space character or a comment.
|
||||
return ![:Comment, :SpaceCharacters].include?(type)
|
||||
elsif tagname == 'head'
|
||||
# A head element's start tag may be omitted if the first thing
|
||||
# inside the head element is an element.
|
||||
return type == :StartTag
|
||||
elsif tagname == 'body'
|
||||
# A body element's start tag may be omitted if the first thing
|
||||
# inside the body element is not a space character or a comment,
|
||||
# except if the first thing inside the body element is a script
|
||||
# or style element and the node immediately preceding the body
|
||||
# element is a head element whose end tag has been omitted.
|
||||
if [:Comment, :SpaceCharacters].include?(type)
|
||||
return false
|
||||
elsif type == :StartTag
|
||||
# XXX: we do not look at the preceding event, so we never omit
|
||||
# the body element's start tag if it's followed by a script or
|
||||
# a style element.
|
||||
return !%w[script style].include?(nexttok[:name])
|
||||
else
|
||||
return true
|
||||
end
|
||||
elsif tagname == 'colgroup'
|
||||
# A colgroup element's start tag may be omitted if the first thing
|
||||
# inside the colgroup element is a col element, and if the element
|
||||
# is not immediately preceeded by another colgroup element whose
|
||||
# end tag has been omitted.
|
||||
if type == :StartTag
|
||||
# XXX: we do not look at the preceding event, so instead we never
|
||||
# omit the colgroup element's end tag when it is immediately
|
||||
# followed by another colgroup element. See is_optional_end.
|
||||
return nexttok[:name] == "col"
|
||||
else
|
||||
return false
|
||||
end
|
||||
elsif tagname == 'tbody'
|
||||
# A tbody element's start tag may be omitted if the first thing
|
||||
# inside the tbody element is a tr element, and if the element is
|
||||
# not immediately preceeded by a tbody, thead, or tfoot element
|
||||
# whose end tag has been omitted.
|
||||
if type == :StartTag
|
||||
# omit the thead and tfoot elements' end tag when they are
|
||||
# immediately followed by a tbody element. See is_optional_end.
|
||||
if previous and previous[:type] == :EndTag and \
|
||||
%w(tbody thead tfoot).include?(previous[:name])
|
||||
return false
|
||||
end
|
||||
|
||||
return nexttok[:name] == 'tr'
|
||||
else
|
||||
return false
|
||||
end
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
def is_optional_end(tagname, nexttok)
|
||||
type = nexttok ? nexttok[:type] : nil
|
||||
if %w[html head body].include?(tagname)
|
||||
# An html element's end tag may be omitted if the html element
|
||||
# is not immediately followed by a space character or a comment.
|
||||
return ![:Comment, :SpaceCharacters].include?(type)
|
||||
elsif %w[li optgroup option tr].include?(tagname)
|
||||
# A li element's end tag may be omitted if the li element is
|
||||
# immediately followed by another li element or if there is
|
||||
# no more content in the parent element.
|
||||
# An optgroup element's end tag may be omitted if the optgroup
|
||||
# element is immediately followed by another optgroup element,
|
||||
# or if there is no more content in the parent element.
|
||||
# An option element's end tag may be omitted if the option
|
||||
# element is immediately followed by another option element,
|
||||
# or if there is no more content in the parent element.
|
||||
# A tr element's end tag may be omitted if the tr element is
|
||||
# immediately followed by another tr element, or if there is
|
||||
# no more content in the parent element.
|
||||
if type == :StartTag
|
||||
return nexttok[:name] == tagname
|
||||
else
|
||||
return type == :EndTag || type == nil
|
||||
end
|
||||
elsif %w(dt dd).include?(tagname)
|
||||
# A dt element's end tag may be omitted if the dt element is
|
||||
# immediately followed by another dt element or a dd element.
|
||||
# A dd element's end tag may be omitted if the dd element is
|
||||
# immediately followed by another dd element or a dt element,
|
||||
# or if there is no more content in the parent element.
|
||||
if type == :StartTag
|
||||
return %w(dt dd).include?(nexttok[:name])
|
||||
elsif tagname == 'dd'
|
||||
return type == :EndTag || type == nil
|
||||
else
|
||||
return false
|
||||
end
|
||||
elsif tagname == 'p'
|
||||
# A p element's end tag may be omitted if the p element is
|
||||
# immediately followed by an address, blockquote, dl, fieldset,
|
||||
# form, h1, h2, h3, h4, h5, h6, hr, menu, ol, p, pre, table,
|
||||
# or ul element, or if there is no more content in the parent
|
||||
# element.
|
||||
if type == :StartTag
|
||||
return %w(address blockquote dl fieldset form h1 h2 h3 h4 h5
|
||||
h6 hr menu ol p pre table ul).include?(nexttok[:name])
|
||||
else
|
||||
return type == :EndTag || type == nil
|
||||
end
|
||||
elsif tagname == 'colgroup'
|
||||
# A colgroup element's end tag may be omitted if the colgroup
|
||||
# element is not immediately followed by a space character or
|
||||
# a comment.
|
||||
if [:Comment, :SpaceCharacters].include?(type)
|
||||
return false
|
||||
elsif type == :StartTag
|
||||
# XXX: we also look for an immediately following colgroup
|
||||
# element. See is_optional_start.
|
||||
return nexttok[:name] != 'colgroup'
|
||||
else
|
||||
return true
|
||||
end
|
||||
elsif %w(thead tbody).include? tagname
|
||||
# A thead element's end tag may be omitted if the thead element
|
||||
# is immediately followed by a tbody or tfoot element.
|
||||
# A tbody element's end tag may be omitted if the tbody element
|
||||
# is immediately followed by a tbody or tfoot element, or if
|
||||
# there is no more content in the parent element.
|
||||
# A tfoot element's end tag may be omitted if the tfoot element
|
||||
# is immediately followed by a tbody element, or if there is no
|
||||
# more content in the parent element.
|
||||
# XXX: we never omit the end tag when the following element is
|
||||
# a tbody. See is_optional_start.
|
||||
if type == :StartTag
|
||||
return %w(tbody tfoot).include?(nexttok[:name])
|
||||
elsif tagname == 'tbody'
|
||||
return (type == :EndTag or type == nil)
|
||||
else
|
||||
return false
|
||||
end
|
||||
elsif tagname == 'tfoot'
|
||||
# A tfoot element's end tag may be omitted if the tfoot element
|
||||
# is immediately followed by a tbody element, or if there is no
|
||||
# more content in the parent element.
|
||||
# XXX: we never omit the end tag when the following element is
|
||||
# a tbody. See is_optional_start.
|
||||
if type == :StartTag
|
||||
return nexttok[:name] == 'tbody'
|
||||
else
|
||||
return type == :EndTag || type == nil
|
||||
end
|
||||
elsif %w(td th).include? tagname
|
||||
# A td element's end tag may be omitted if the td element is
|
||||
# immediately followed by a td or th element, or if there is
|
||||
# no more content in the parent element.
|
||||
# A th element's end tag may be omitted if the th element is
|
||||
# immediately followed by a td or th element, or if there is
|
||||
# no more content in the parent element.
|
||||
if type == :StartTag
|
||||
return %w(td th).include?(nexttok[:name])
|
||||
else
|
||||
return type == :EndTag || type == nil
|
||||
end
|
||||
end
|
||||
return false
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
15
vendor/plugins/HTML5lib/lib/html5/filters/sanitizer.rb
vendored
Normal file
15
vendor/plugins/HTML5lib/lib/html5/filters/sanitizer.rb
vendored
Normal file
|
@ -0,0 +1,15 @@
|
|||
require 'html5/filters/base'
|
||||
require 'html5/sanitizer'
|
||||
|
||||
module HTML5
|
||||
module Filters
|
||||
class HTMLSanitizeFilter < Base
|
||||
include HTMLSanitizeModule
|
||||
def each
|
||||
__getobj__.each do |token|
|
||||
yield(sanitize_token(token))
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
36
vendor/plugins/HTML5lib/lib/html5/filters/whitespace.rb
vendored
Normal file
36
vendor/plugins/HTML5lib/lib/html5/filters/whitespace.rb
vendored
Normal file
|
@ -0,0 +1,36 @@
|
|||
require 'html5/constants'
|
||||
require 'html5/filters/base'
|
||||
|
||||
module HTML5
|
||||
module Filters
|
||||
class WhitespaceFilter < Base
|
||||
|
||||
SPACE_PRESERVE_ELEMENTS = %w[pre textarea] + RCDATA_ELEMENTS
|
||||
SPACES = /[#{SPACE_CHARACTERS.join('')}]+/m
|
||||
|
||||
def each
|
||||
preserve = 0
|
||||
__getobj__.each do |token|
|
||||
case token[:type]
|
||||
when :StartTag
|
||||
if preserve > 0 or SPACE_PRESERVE_ELEMENTS.include?(token[:name])
|
||||
preserve += 1
|
||||
end
|
||||
|
||||
when :EndTag
|
||||
preserve -= 1 if preserve > 0
|
||||
|
||||
when :SpaceCharacters
|
||||
next if preserve == 0
|
||||
|
||||
when :Characters
|
||||
token[:data] = token[:data].sub(SPACES,' ') if preserve == 0
|
||||
end
|
||||
|
||||
yield token
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
246
vendor/plugins/HTML5lib/lib/html5/html5parser.rb
vendored
Normal file
246
vendor/plugins/HTML5lib/lib/html5/html5parser.rb
vendored
Normal file
|
@ -0,0 +1,246 @@
|
|||
require 'html5/constants'
|
||||
require 'html5/tokenizer'
|
||||
require 'html5/treebuilders/rexml'
|
||||
|
||||
Dir.glob(File.join(File.dirname(__FILE__), 'html5parser', '*_phase.rb')).each do |path|
|
||||
require 'html5/html5parser/' + File.basename(path)
|
||||
end
|
||||
|
||||
module HTML5
|
||||
|
||||
# Error in parsed document
|
||||
class ParseError < Exception; end
|
||||
class AssertionError < Exception; end
|
||||
|
||||
# HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML
|
||||
#
|
||||
class HTMLParser
|
||||
|
||||
attr_accessor :phase, :firstStartTag, :innerHTML, :lastPhase, :insertFromTable
|
||||
|
||||
attr_reader :phases, :tokenizer, :tree, :errors
|
||||
|
||||
def self.parse(stream, options = {})
|
||||
encoding = options.delete(:encoding)
|
||||
new(options).parse(stream,encoding)
|
||||
end
|
||||
|
||||
def self.parseFragment(stream, options = {})
|
||||
container = options.delete(:container) || 'div'
|
||||
encoding = options.delete(:encoding)
|
||||
new(options).parseFragment(stream,container,encoding)
|
||||
end
|
||||
|
||||
@@phases = %w( initial rootElement beforeHead inHead afterHead inBody inTable inCaption
|
||||
inColumnGroup inTableBody inRow inCell inSelect afterBody inFrameset afterFrameset trailingEnd )
|
||||
|
||||
# :strict - raise an exception when a parse error is encountered
|
||||
# :tree - a treebuilder class controlling the type of tree that will be
|
||||
# returned. Built in treebuilders can be accessed through
|
||||
# HTML5::TreeBuilders[treeType]
|
||||
def initialize(options = {})
|
||||
@strict = false
|
||||
@errors = []
|
||||
|
||||
@tokenizer = HTMLTokenizer
|
||||
@tree = TreeBuilders::REXML::TreeBuilder
|
||||
|
||||
options.each { |name, value| instance_variable_set("@#{name}", value) }
|
||||
|
||||
@tree = @tree.new
|
||||
|
||||
@phases = @@phases.inject({}) do |phases, phase_name|
|
||||
phase_class_name = phase_name.sub(/(.)/) { $1.upcase } + 'Phase'
|
||||
phases[phase_name.to_sym] = HTML5.const_get(phase_class_name).new(self, @tree)
|
||||
phases
|
||||
end
|
||||
end
|
||||
|
||||
def _parse(stream, innerHTML, encoding, container = 'div')
|
||||
@tree.reset
|
||||
@firstStartTag = false
|
||||
@errors = []
|
||||
|
||||
@tokenizer = @tokenizer.class unless Class === @tokenizer
|
||||
@tokenizer = @tokenizer.new(stream, :encoding => encoding,
|
||||
:parseMeta => !innerHTML)
|
||||
|
||||
if innerHTML
|
||||
case @innerHTML = container.downcase
|
||||
when 'title', 'textarea'
|
||||
@tokenizer.contentModelFlag = :RCDATA
|
||||
when 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', 'noscript'
|
||||
@tokenizer.contentModelFlag = :CDATA
|
||||
when 'plaintext'
|
||||
@tokenizer.contentModelFlag = :PLAINTEXT
|
||||
else
|
||||
# contentModelFlag already is PCDATA
|
||||
#@tokenizer.contentModelFlag = :PCDATA
|
||||
end
|
||||
|
||||
@phase = @phases[:rootElement]
|
||||
@phase.insertHtmlElement
|
||||
resetInsertionMode
|
||||
else
|
||||
@innerHTML = false
|
||||
@phase = @phases[:initial]
|
||||
end
|
||||
|
||||
# We only seem to have InBodyPhase testcases where the following is
|
||||
# relevant ... need others too
|
||||
@lastPhase = nil
|
||||
|
||||
# XXX This is temporary for the moment so there isn't any other
|
||||
# changes needed for the parser to work with the iterable tokenizer
|
||||
@tokenizer.each do |token|
|
||||
token = normalizeToken(token)
|
||||
|
||||
method = 'process%s' % token[:type]
|
||||
|
||||
case token[:type]
|
||||
when :Characters, :SpaceCharacters, :Comment
|
||||
@phase.send method, token[:data]
|
||||
when :StartTag
|
||||
@phase.send method, token[:name], token[:data]
|
||||
when :EndTag
|
||||
@phase.send method, token[:name]
|
||||
when :Doctype
|
||||
@phase.send method, token[:name], token[:publicId],
|
||||
token[:systemId], token[:correct]
|
||||
else
|
||||
parseError(token[:data])
|
||||
end
|
||||
end
|
||||
|
||||
# When the loop finishes it's EOF
|
||||
@phase.processEOF
|
||||
end
|
||||
|
||||
# Parse a HTML document into a well-formed tree
|
||||
#
|
||||
# stream - a filelike object or string containing the HTML to be parsed
|
||||
#
|
||||
# The optional encoding parameter must be a string that indicates
|
||||
# the encoding. If specified, that encoding will be used,
|
||||
# regardless of any BOM or later declaration (such as in a meta
|
||||
# element)
|
||||
def parse(stream, encoding=nil)
|
||||
_parse(stream, false, encoding)
|
||||
return @tree.getDocument
|
||||
end
|
||||
|
||||
# Parse a HTML fragment into a well-formed tree fragment
|
||||
|
||||
# container - name of the element we're setting the innerHTML property
|
||||
# if set to nil, default to 'div'
|
||||
#
|
||||
# stream - a filelike object or string containing the HTML to be parsed
|
||||
#
|
||||
# The optional encoding parameter must be a string that indicates
|
||||
# the encoding. If specified, that encoding will be used,
|
||||
# regardless of any BOM or later declaration (such as in a meta
|
||||
# element)
|
||||
def parseFragment(stream, container='div', encoding=nil)
|
||||
_parse(stream, true, encoding, container)
|
||||
return @tree.getFragment
|
||||
end
|
||||
|
||||
def parseError(data = 'XXX ERROR MESSAGE NEEDED')
|
||||
# XXX The idea is to make data mandatory.
|
||||
@errors.push([@tokenizer.stream.position, data])
|
||||
raise ParseError if @strict
|
||||
end
|
||||
|
||||
# HTML5 specific normalizations to the token stream
|
||||
def normalizeToken(token)
|
||||
|
||||
if token[:type] == :EmptyTag
|
||||
# When a solidus (/) is encountered within a tag name what happens
|
||||
# depends on whether the current tag name matches that of a void
|
||||
# element. If it matches a void element atheists did the wrong
|
||||
# thing and if it doesn't it's wrong for everyone.
|
||||
|
||||
unless VOID_ELEMENTS.include?(token[:name])
|
||||
parseError(_('Solidus (/) incorrectly placed in tag.'))
|
||||
end
|
||||
|
||||
token[:type] = :StartTag
|
||||
end
|
||||
|
||||
if token[:type] == :StartTag
|
||||
token[:name] = token[:name].tr(ASCII_UPPERCASE,ASCII_LOWERCASE)
|
||||
|
||||
# We need to remove the duplicate attributes and convert attributes
|
||||
# to a dict so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
|
||||
|
||||
unless token[:data].empty?
|
||||
data = token[:data].reverse.map { |attr, value| [attr.tr(ASCII_UPPERCASE, ASCII_LOWERCASE), value] }
|
||||
token[:data] = Hash[*data.flatten]
|
||||
end
|
||||
|
||||
elsif token[:type] == :EndTag
|
||||
parseError(_('End tag contains unexpected attributes.')) unless token[:data].empty?
|
||||
token[:name] = token[:name].downcase
|
||||
end
|
||||
|
||||
return token
|
||||
end
|
||||
|
||||
@@new_modes = {
|
||||
'select' => :inSelect,
|
||||
'td' => :inCell,
|
||||
'th' => :inCell,
|
||||
'tr' => :inRow,
|
||||
'tbody' => :inTableBody,
|
||||
'thead' => :inTableBody,
|
||||
'tfoot' => :inTableBody,
|
||||
'caption' => :inCaption,
|
||||
'colgroup' => :inColumnGroup,
|
||||
'table' => :inTable,
|
||||
'head' => :inBody,
|
||||
'body' => :inBody,
|
||||
'frameset' => :inFrameset
|
||||
}
|
||||
|
||||
def resetInsertionMode
|
||||
# The name of this method is mostly historical. (It's also used in the
|
||||
# specification.)
|
||||
last = false
|
||||
|
||||
@tree.openElements.reverse.each do |node|
|
||||
nodeName = node.name
|
||||
|
||||
if node == @tree.openElements[0]
|
||||
last = true
|
||||
unless ['td', 'th'].include?(nodeName)
|
||||
# XXX
|
||||
# assert @innerHTML
|
||||
nodeName = @innerHTML
|
||||
end
|
||||
end
|
||||
|
||||
# Check for conditions that should only happen in the innerHTML
|
||||
# case
|
||||
if ['select', 'colgroup', 'head', 'frameset'].include?(nodeName)
|
||||
# XXX
|
||||
# assert @innerHTML
|
||||
end
|
||||
|
||||
if @@new_modes.has_key?(nodeName)
|
||||
@phase = @phases[@@new_modes[nodeName]]
|
||||
elsif nodeName == 'html'
|
||||
@phase = @phases[@tree.headPointer.nil?? :beforeHead : :afterHead]
|
||||
elsif last
|
||||
@phase = @phases[:inBody]
|
||||
else
|
||||
next
|
||||
end
|
||||
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
def _(string); string; end
|
||||
end
|
||||
|
||||
end
|
46
vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb
vendored
Normal file
46
vendor/plugins/HTML5lib/lib/html5/html5parser/after_body_phase.rb
vendored
Normal file
|
@ -0,0 +1,46 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class AfterBodyPhase < Phase
|
||||
|
||||
handle_end 'html'
|
||||
|
||||
def processComment(data)
|
||||
# This is needed because data is to be appended to the <html> element
|
||||
# here and not to whatever is currently open.
|
||||
@tree.insertComment(data, @tree.openElements[0])
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.parseError(_('Unexpected non-space characters in the after body phase.'))
|
||||
@parser.phase = @parser.phases[:inBody]
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def processStartTag(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag token (#{name}) in the after body phase."))
|
||||
@parser.phase = @parser.phases[:inBody]
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagHtml(name)
|
||||
if @parser.innerHTML
|
||||
@parser.parseError
|
||||
else
|
||||
# XXX: This may need to be done, not sure
|
||||
# Don't set lastPhase to the current phase but to the inBody phase
|
||||
# instead. No need for extra parse errors if there's something after </html>.
|
||||
# Try "<!doctype html>X</html>X" for instance.
|
||||
@parser.lastPhase = @parser.phase
|
||||
@parser.phase = @parser.phases[:trailingEnd]
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.parseError(_("Unexpected end tag token (#{name}) in the after body phase."))
|
||||
@parser.phase = @parser.phases[:inBody]
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
34
vendor/plugins/HTML5lib/lib/html5/html5parser/after_frameset_phase.rb
vendored
Normal file
34
vendor/plugins/HTML5lib/lib/html5/html5parser/after_frameset_phase.rb
vendored
Normal file
|
@ -0,0 +1,34 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class AfterFramesetPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#after3
|
||||
|
||||
handle_start 'html', 'noframes'
|
||||
|
||||
handle_end 'html'
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.parseError(_('Unexpected non-space characters in the after frameset phase. Ignored.'))
|
||||
end
|
||||
|
||||
def startTagNoframes(name, attributes)
|
||||
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag (#{name}) in the after frameset phase. Ignored."))
|
||||
end
|
||||
|
||||
def endTagHtml(name)
|
||||
@parser.lastPhase = @parser.phase
|
||||
@parser.phase = @parser.phases[:trailingEnd]
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}) in the after frameset phase. Ignored."))
|
||||
end
|
||||
|
||||
end
|
||||
end
|
50
vendor/plugins/HTML5lib/lib/html5/html5parser/after_head_phase.rb
vendored
Normal file
50
vendor/plugins/HTML5lib/lib/html5/html5parser/after_head_phase.rb
vendored
Normal file
|
@ -0,0 +1,50 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class AfterHeadPhase < Phase
|
||||
|
||||
handle_start 'html', 'body', 'frameset', %w( base link meta script style title ) => 'FromHead'
|
||||
|
||||
def processEOF
|
||||
anythingElse
|
||||
@parser.phase.processEOF
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
anythingElse
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def startTagBody(name, attributes)
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.phase = @parser.phases[:inBody]
|
||||
end
|
||||
|
||||
def startTagFrameset(name, attributes)
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.phase = @parser.phases[:inFrameset]
|
||||
end
|
||||
|
||||
def startTagFromHead(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag (#{name}) that can be in head. Moved."))
|
||||
@parser.phase = @parser.phases[:inHead]
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
anythingElse
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def processEndTag(name)
|
||||
anythingElse
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
def anythingElse
|
||||
@tree.insertElement('body', {})
|
||||
@parser.phase = @parser.phases[:inBody]
|
||||
end
|
||||
|
||||
end
|
||||
end
|
41
vendor/plugins/HTML5lib/lib/html5/html5parser/before_head_phase.rb
vendored
Normal file
41
vendor/plugins/HTML5lib/lib/html5/html5parser/before_head_phase.rb
vendored
Normal file
|
@ -0,0 +1,41 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class BeforeHeadPhase < Phase
|
||||
|
||||
handle_start 'html', 'head'
|
||||
|
||||
handle_end %w( html head body br p ) => 'ImplyHead'
|
||||
|
||||
def processEOF
|
||||
startTagHead('head', {})
|
||||
@parser.phase.processEOF
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
startTagHead('head', {})
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def startTagHead(name, attributes)
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.headPointer = @tree.openElements[-1]
|
||||
@parser.phase = @parser.phases[:inHead]
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
startTagHead('head', {})
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagImplyHead(name)
|
||||
startTagHead('head', {})
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}) after the (implied) root element."))
|
||||
end
|
||||
|
||||
end
|
||||
end
|
592
vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb
vendored
Normal file
592
vendor/plugins/HTML5lib/lib/html5/html5parser/in_body_phase.rb
vendored
Normal file
|
@ -0,0 +1,592 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InBodyPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-body
|
||||
|
||||
handle_start 'html'
|
||||
handle_start %w( base link meta script style ) => 'ProcessInHead'
|
||||
handle_start 'title'
|
||||
|
||||
handle_start 'body', 'form', 'plaintext', 'a', 'button', 'xmp', 'table', 'hr', 'image'
|
||||
|
||||
handle_start 'input', 'textarea', 'select', 'isindex', %w( marquee object )
|
||||
|
||||
handle_start %w( li dd dt ) => 'ListItem'
|
||||
|
||||
handle_start %w( address blockquote center dir div dl fieldset listing menu ol p pre ul ) => 'CloseP'
|
||||
|
||||
handle_start %w( b big em font i s small strike strong tt u ) => 'Formatting'
|
||||
handle_start 'nobr'
|
||||
|
||||
handle_start %w( area basefont bgsound br embed img param spacer wbr ) => 'VoidFormatting'
|
||||
|
||||
handle_start %w( iframe noembed noframes noscript ) => 'Cdata', HEADING_ELEMENTS => 'Heading'
|
||||
|
||||
handle_start %w( caption col colgroup frame frameset head option optgroup tbody td tfoot th thead tr ) => 'Misplaced'
|
||||
|
||||
handle_start %w( event-source section nav article aside header footer datagrid command ) => 'New'
|
||||
|
||||
handle_end 'p', 'body', 'html', 'form', %w( button marquee object ), %w( dd dt li ) => 'ListItem'
|
||||
|
||||
handle_end %w( address blockquote center div dl fieldset listing menu ol pre ul ) => 'Block'
|
||||
|
||||
handle_end HEADING_ELEMENTS => 'Heading'
|
||||
|
||||
handle_end %w( a b big em font i nobr s small strike strong tt u ) => 'Formatting'
|
||||
|
||||
handle_end %w( head frameset select optgroup option table caption colgroup col thead tfoot tbody tr td th ) => 'Misplaced'
|
||||
|
||||
handle_end 'br'
|
||||
|
||||
handle_end %w( area basefont bgsound embed hr image img input isindex param spacer wbr frame ) => 'None'
|
||||
|
||||
handle_end %w( noframes noscript noembed textarea xmp iframe ) => 'CdataTextAreaXmp'
|
||||
|
||||
handle_end %w( event-source section nav article aside header footer datagrid command ) => 'New'
|
||||
|
||||
def initialize(parser, tree)
|
||||
super(parser, tree)
|
||||
|
||||
# for special handling of whitespace in <pre>
|
||||
@processSpaceCharactersDropNewline = false
|
||||
end
|
||||
|
||||
def processSpaceCharactersDropNewline(data)
|
||||
#Sometimes (start of <pre> blocks) we want to drop leading newlines
|
||||
@processSpaceCharactersDropNewline = false
|
||||
if (data.length > 0 and data[0] == ?\n and
|
||||
%w[pre textarea].include?(@tree.openElements[-1].name) and
|
||||
not @tree.openElements[-1].hasContent)
|
||||
data = data[1..-1]
|
||||
end
|
||||
@tree.insertText(data) if data.length > 0
|
||||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
if @processSpaceCharactersDropNewline
|
||||
processSpaceCharactersDropNewline(data)
|
||||
else
|
||||
super(data)
|
||||
end
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
# XXX The specification says to do this for every character at the
|
||||
# moment, but apparently that doesn't match the real world so we don't
|
||||
# do it for space characters.
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertText(data)
|
||||
end
|
||||
|
||||
def startTagProcessInHead(name, attributes)
|
||||
@parser.phases[:inHead].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagTitle(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag (#{name}) that belongs in the head. Moved."))
|
||||
@parser.phases[:inHead].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagBody(name, attributes)
|
||||
@parser.parseError(_('Unexpected start tag (body).'))
|
||||
|
||||
if (@tree.openElements.length == 1 or
|
||||
@tree.openElements[1].name != 'body')
|
||||
assert @parser.innerHTML
|
||||
else
|
||||
attributes.each do |attr, value|
|
||||
unless @tree.openElements[1].attributes.has_key?(attr)
|
||||
@tree.openElements[1].attributes[attr] = value
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def startTagCloseP(name, attributes)
|
||||
endTagP('p') if in_scope?('p')
|
||||
@tree.insertElement(name, attributes)
|
||||
@processSpaceCharactersDropNewline = true if name == 'pre'
|
||||
end
|
||||
|
||||
def startTagForm(name, attributes)
|
||||
if @tree.formPointer
|
||||
@parser.parseError(_('Unexpected start tag (form). Ignored.'))
|
||||
else
|
||||
endTagP('p') if in_scope?('p')
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.formPointer = @tree.openElements[-1]
|
||||
end
|
||||
end
|
||||
|
||||
def startTagListItem(name, attributes)
|
||||
endTagP('p') if in_scope?('p')
|
||||
stopNames = {'li' => ['li'], 'dd' => ['dd', 'dt'], 'dt' => ['dd', 'dt']}
|
||||
stopName = stopNames[name]
|
||||
|
||||
@tree.openElements.reverse.each_with_index do |node, i|
|
||||
if stopName.include?(node.name)
|
||||
poppedNodes = (0..i).collect { @tree.openElements.pop }
|
||||
if i >= 1
|
||||
@parser.parseError(_("Missing end tag%s (%s)" % [
|
||||
(i>1 ? 's' : ''),
|
||||
poppedNodes.reverse.map {|item| item.name}.join(', ')]))
|
||||
end
|
||||
break
|
||||
end
|
||||
|
||||
# Phrasing elements are all non special, non scoping, non
|
||||
# formatting elements
|
||||
break if ((SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name) and
|
||||
not ['address', 'div'].include?(node.name))
|
||||
end
|
||||
|
||||
# Always insert an <li> element.
|
||||
@tree.insertElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagPlaintext(name, attributes)
|
||||
endTagP('p') if in_scope?('p')
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.tokenizer.contentModelFlag = :PLAINTEXT
|
||||
end
|
||||
|
||||
def startTagHeading(name, attributes)
|
||||
endTagP('p') if in_scope?('p')
|
||||
|
||||
# Uncomment the following for IE7 behavior:
|
||||
# HEADING_ELEMENTS.each do |element|
|
||||
# if in_scope?(element)
|
||||
# @parser.parseError(_("Unexpected start tag (#{name})."))
|
||||
#
|
||||
# remove_open_elements_until do |element|
|
||||
# HEADING_ELEMENTS.include?(element.name)
|
||||
# end
|
||||
#
|
||||
# break
|
||||
# end
|
||||
# end
|
||||
@tree.insertElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagA(name, attributes)
|
||||
if afeAElement = @tree.elementInActiveFormattingElements('a')
|
||||
@parser.parseError(_('Unexpected start tag (a) implies end tag (a).'))
|
||||
endTagFormatting('a')
|
||||
@tree.openElements.delete(afeAElement) if @tree.openElements.include?(afeAElement)
|
||||
@tree.activeFormattingElements.delete(afeAElement) if @tree.activeFormattingElements.include?(afeAElement)
|
||||
end
|
||||
@tree.reconstructActiveFormattingElements
|
||||
addFormattingElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagFormatting(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
addFormattingElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagNobr(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
processEndTag('nobr') if in_scope?('nobr')
|
||||
addFormattingElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagButton(name, attributes)
|
||||
if in_scope?('button')
|
||||
@parser.parseError(_('Unexpected start tag (button) implied end tag (button).'))
|
||||
processEndTag('button')
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
else
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.activeFormattingElements.push(Marker)
|
||||
end
|
||||
end
|
||||
|
||||
def startTagMarqueeObject(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.activeFormattingElements.push(Marker)
|
||||
end
|
||||
|
||||
def startTagXmp(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.tokenizer.contentModelFlag = :CDATA
|
||||
end
|
||||
|
||||
def startTagTable(name, attributes)
|
||||
processEndTag('p') if in_scope?('p')
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.phase = @parser.phases[:inTable]
|
||||
end
|
||||
|
||||
def startTagVoidFormatting(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.openElements.pop
|
||||
end
|
||||
|
||||
def startTagHr(name, attributes)
|
||||
endTagP('p') if in_scope?('p')
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.openElements.pop
|
||||
end
|
||||
|
||||
def startTagImage(name, attributes)
|
||||
# No really...
|
||||
@parser.parseError(_('Unexpected start tag (image). Treated as img.'))
|
||||
processStartTag('img', attributes)
|
||||
end
|
||||
|
||||
def startTagInput(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, attributes)
|
||||
if @tree.formPointer
|
||||
# XXX Not exactly sure what to do here
|
||||
# @tree.openElements[-1].form = @tree.formPointer
|
||||
end
|
||||
@tree.openElements.pop
|
||||
end
|
||||
|
||||
def startTagIsindex(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag isindex. Don't use it!"))
|
||||
return if @tree.formPointer
|
||||
processStartTag('form', {})
|
||||
processStartTag('hr', {})
|
||||
processStartTag('p', {})
|
||||
processStartTag('label', {})
|
||||
# XXX Localization ...
|
||||
processCharacters('This is a searchable index. Insert your search keywords here:')
|
||||
attributes['name'] = 'isindex'
|
||||
attrs = attributes.to_a
|
||||
processStartTag('input', attributes)
|
||||
processEndTag('label')
|
||||
processEndTag('p')
|
||||
processStartTag('hr', {})
|
||||
processEndTag('form')
|
||||
end
|
||||
|
||||
def startTagTextarea(name, attributes)
|
||||
# XXX Form element pointer checking here as well...
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.tokenizer.contentModelFlag = :RCDATA
|
||||
@processSpaceCharactersDropNewline = true
|
||||
end
|
||||
|
||||
# iframe, noembed noframes, noscript(if scripting enabled)
|
||||
def startTagCdata(name, attributes)
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.tokenizer.contentModelFlag = :CDATA
|
||||
end
|
||||
|
||||
def startTagSelect(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.phase = @parser.phases[:inSelect]
|
||||
end
|
||||
|
||||
def startTagMisplaced(name, attributes)
|
||||
# Elements that should be children of other elements that have a
|
||||
# different insertion mode; here they are ignored
|
||||
# "caption", "col", "colgroup", "frame", "frameset", "head",
|
||||
# "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
|
||||
# "tr", "noscript"
|
||||
@parser.parseError(_("Unexpected start tag (#{name}). Ignored."))
|
||||
end
|
||||
|
||||
def startTagNew(name, attributes)
|
||||
# New HTML5 elements, "event-source", "section", "nav",
|
||||
# "article", "aside", "header", "footer", "datagrid", "command"
|
||||
sys.stderr.write("Warning: Undefined behaviour for start tag #{name}")
|
||||
startTagOther(name, attributes)
|
||||
#raise NotImplementedError
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, attributes)
|
||||
end
|
||||
|
||||
def endTagP(name)
|
||||
@tree.generateImpliedEndTags('p') if in_scope?('p')
|
||||
@parser.parseError(_('Unexpected end tag (p).')) unless @tree.openElements[-1].name == 'p'
|
||||
if in_scope?('p')
|
||||
@tree.openElements.pop while in_scope?('p')
|
||||
else
|
||||
startTagCloseP('p', {})
|
||||
endTagP('p')
|
||||
end
|
||||
end
|
||||
|
||||
def endTagBody(name)
|
||||
# XXX Need to take open <p> tags into account here. We shouldn't imply
|
||||
# </p> but we should not throw a parse error either. Specification is
|
||||
# likely to be updated.
|
||||
unless @tree.openElements[1].name == 'body'
|
||||
# innerHTML case
|
||||
@parser.parseError
|
||||
return
|
||||
end
|
||||
unless @tree.openElements[-1].name == 'body'
|
||||
@parser.parseError(_("Unexpected end tag (body). Missing end tag (#{@tree.openElements[-1].name})."))
|
||||
end
|
||||
@parser.phase = @parser.phases[:afterBody]
|
||||
end
|
||||
|
||||
def endTagHtml(name)
|
||||
endTagBody(name)
|
||||
@parser.phase.processEndTag(name) unless @parser.innerHTML
|
||||
end
|
||||
|
||||
def endTagBlock(name)
|
||||
#Put us back in the right whitespace handling mode
|
||||
@processSpaceCharactersDropNewline = false if name == 'pre'
|
||||
|
||||
@tree.generateImpliedEndTags if in_scope?(name)
|
||||
|
||||
unless @tree.openElements[-1].name == name
|
||||
@parser.parseError(_("End tag (#{name}) seen too early. Expected other end tag."))
|
||||
end
|
||||
|
||||
if in_scope?(name)
|
||||
remove_open_elements_until(name)
|
||||
end
|
||||
end
|
||||
|
||||
def endTagForm(name)
|
||||
if in_scope?(name)
|
||||
@tree.generateImpliedEndTags
|
||||
end
|
||||
if @tree.openElements[-1].name != name
|
||||
@parser.parseError(_("End tag (form) seen too early. Ignored."))
|
||||
else
|
||||
@tree.openElements.pop
|
||||
end
|
||||
@tree.formPointer = nil
|
||||
end
|
||||
|
||||
def endTagListItem(name)
|
||||
# AT Could merge this with the Block case
|
||||
if in_scope?(name)
|
||||
@tree.generateImpliedEndTags(name)
|
||||
|
||||
unless @tree.openElements[-1].name == name
|
||||
@parser.parseError(_("End tag (#{name}) seen too early. Expected other end tag."))
|
||||
end
|
||||
end
|
||||
|
||||
remove_open_elements_until(name) if in_scope?(name)
|
||||
end
|
||||
|
||||
def endTagHeading(name)
|
||||
HEADING_ELEMENTS.each do |element|
|
||||
if in_scope?(element)
|
||||
@tree.generateImpliedEndTags
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
unless @tree.openElements[-1].name == name
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag."))
|
||||
end
|
||||
|
||||
HEADING_ELEMENTS.each do |element|
|
||||
if in_scope?(element)
|
||||
remove_open_elements_until { |element| HEADING_ELEMENTS.include?(element.name) }
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# The much-feared adoption agency algorithm
|
||||
def endTagFormatting(name)
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
|
||||
# XXX Better parseError messages appreciated.
|
||||
while true
|
||||
# Step 1 paragraph 1
|
||||
afeElement = @tree.elementInActiveFormattingElements(name)
|
||||
if not afeElement or (@tree.openElements.include?(afeElement) and not in_scope?(afeElement.name))
|
||||
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 1 of the adoption agency algorithm."))
|
||||
return
|
||||
# Step 1 paragraph 2
|
||||
elsif not @tree.openElements.include?(afeElement)
|
||||
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 2 of the adoption agency algorithm."))
|
||||
@tree.activeFormattingElements.delete(afeElement)
|
||||
return
|
||||
end
|
||||
|
||||
# Step 1 paragraph 3
|
||||
if afeElement != @tree.openElements[-1]
|
||||
@parser.parseError(_("End tag (#{name}) violates step 1, paragraph 3 of the adoption agency algorithm."))
|
||||
end
|
||||
|
||||
# Step 2
|
||||
# Start of the adoption agency algorithm proper
|
||||
afeIndex = @tree.openElements.index(afeElement)
|
||||
furthestBlock = nil
|
||||
@tree.openElements[afeIndex..-1].each do |element|
|
||||
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(element.name)
|
||||
furthestBlock = element
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
# Step 3
|
||||
if furthestBlock.nil?
|
||||
element = remove_open_elements_until { |element| element == afeElement }
|
||||
@tree.activeFormattingElements.delete(element)
|
||||
return
|
||||
end
|
||||
commonAncestor = @tree.openElements[afeIndex - 1]
|
||||
|
||||
# Step 5
|
||||
furthestBlock.parent.removeChild(furthestBlock) if furthestBlock.parent
|
||||
|
||||
# Step 6
|
||||
# The bookmark is supposed to help us identify where to reinsert
|
||||
# nodes in step 12. We have to ensure that we reinsert nodes after
|
||||
# the node before the active formatting element. Note the bookmark
|
||||
# can move in step 7.4
|
||||
bookmark = @tree.activeFormattingElements.index(afeElement)
|
||||
|
||||
# Step 7
|
||||
lastNode = node = furthestBlock
|
||||
while true
|
||||
# AT replace this with a function and recursion?
|
||||
# Node is element before node in open elements
|
||||
node = @tree.openElements[@tree.openElements.index(node) - 1]
|
||||
until @tree.activeFormattingElements.include?(node)
|
||||
tmpNode = node
|
||||
node = @tree.openElements[@tree.openElements.index(node) - 1]
|
||||
@tree.openElements.delete(tmpNode)
|
||||
end
|
||||
# Step 7.3
|
||||
break if node == afeElement
|
||||
# Step 7.4
|
||||
if lastNode == furthestBlock
|
||||
# XXX should this be index(node) or index(node)+1
|
||||
# Anne: I think +1 is ok. Given x = [2,3,4,5]
|
||||
# x.index(3) gives 1 and then x[1 +1] gives 4...
|
||||
bookmark = @tree.activeFormattingElements.index(node) + 1
|
||||
end
|
||||
# Step 7.5
|
||||
cite = node.parent
|
||||
if node.hasContent
|
||||
clone = node.cloneNode
|
||||
# Replace node with clone
|
||||
@tree.activeFormattingElements[@tree.activeFormattingElements.index(node)] = clone
|
||||
@tree.openElements[@tree.openElements.index(node)] = clone
|
||||
node = clone
|
||||
end
|
||||
# Step 7.6
|
||||
# Remove lastNode from its parents, if any
|
||||
lastNode.parent.removeChild(lastNode) if lastNode.parent
|
||||
node.appendChild(lastNode)
|
||||
# Step 7.7
|
||||
lastNode = node
|
||||
# End of inner loop
|
||||
end
|
||||
|
||||
# Step 8
|
||||
lastNode.parent.removeChild(lastNode) if lastNode.parent
|
||||
commonAncestor.appendChild(lastNode)
|
||||
|
||||
# Step 9
|
||||
clone = afeElement.cloneNode
|
||||
|
||||
# Step 10
|
||||
furthestBlock.reparentChildren(clone)
|
||||
|
||||
# Step 11
|
||||
furthestBlock.appendChild(clone)
|
||||
|
||||
# Step 12
|
||||
@tree.activeFormattingElements.delete(afeElement)
|
||||
@tree.activeFormattingElements.insert([bookmark,@tree.activeFormattingElements.length].min, clone)
|
||||
|
||||
# Step 13
|
||||
@tree.openElements.delete(afeElement)
|
||||
@tree.openElements.insert(@tree.openElements.index(furthestBlock) + 1, clone)
|
||||
end
|
||||
end
|
||||
|
||||
def endTagButtonMarqueeObject(name)
|
||||
@tree.generateImpliedEndTags if in_scope?(name)
|
||||
|
||||
unless @tree.openElements[-1].name == name
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Expected other end tag first."))
|
||||
end
|
||||
|
||||
if in_scope?(name)
|
||||
remove_open_elements_until(name)
|
||||
|
||||
@tree.clearActiveFormattingElements
|
||||
end
|
||||
end
|
||||
|
||||
def endTagMisplaced(name)
|
||||
# This handles elements with end tags in other insertion modes.
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
|
||||
def endTagBr(name)
|
||||
@parser.parseError(_("Unexpected end tag (br). Treated as br element."))
|
||||
@tree.reconstructActiveFormattingElements
|
||||
@tree.insertElement(name, {})
|
||||
@tree.openElements.pop()
|
||||
end
|
||||
|
||||
def endTagNone(name)
|
||||
# This handles elements with no end tag.
|
||||
@parser.parseError(_("This tag (#{name}) has no end tag"))
|
||||
end
|
||||
|
||||
def endTagCdataTextAreaXmp(name)
|
||||
if @tree.openElements[-1].name == name
|
||||
@tree.openElements.pop
|
||||
else
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
end
|
||||
|
||||
def endTagNew(name)
|
||||
# New HTML5 elements, "event-source", "section", "nav",
|
||||
# "article", "aside", "header", "footer", "datagrid", "command"
|
||||
STDERR.puts "Warning: Undefined behaviour for end tag #{name}"
|
||||
endTagOther(name)
|
||||
#raise NotImplementedError
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
# XXX This logic should be moved into the treebuilder
|
||||
@tree.openElements.reverse.each do |node|
|
||||
if node.name == name
|
||||
@tree.generateImpliedEndTags
|
||||
|
||||
unless @tree.openElements[-1].name == name
|
||||
@parser.parseError(_("Unexpected end tag (#{name})."))
|
||||
end
|
||||
|
||||
remove_open_elements_until { |element| element == node }
|
||||
|
||||
break
|
||||
else
|
||||
if (SPECIAL_ELEMENTS + SCOPING_ELEMENTS).include?(node.name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def addFormattingElement(name, attributes)
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.activeFormattingElements.push(@tree.openElements[-1])
|
||||
end
|
||||
|
||||
end
|
||||
end
|
68
vendor/plugins/HTML5lib/lib/html5/html5parser/in_caption_phase.rb
vendored
Normal file
68
vendor/plugins/HTML5lib/lib/html5/html5parser/in_caption_phase.rb
vendored
Normal file
|
@ -0,0 +1,68 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InCaptionPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-caption
|
||||
|
||||
handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableElement'
|
||||
|
||||
handle_end 'caption', 'table', %w( body col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
|
||||
|
||||
def ignoreEndTagCaption
|
||||
not in_scope?('caption', true)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.phases[:inBody].processCharacters(data)
|
||||
end
|
||||
|
||||
def startTagTableElement(name, attributes)
|
||||
@parser.parseError
|
||||
#XXX Have to duplicate logic here to find out if the tag is ignored
|
||||
ignoreEndTag = ignoreEndTagCaption
|
||||
@parser.phase.processEndTag('caption')
|
||||
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagCaption(name)
|
||||
if ignoreEndTagCaption
|
||||
# innerHTML case
|
||||
assert @parser.innerHTML
|
||||
@parser.parseError
|
||||
else
|
||||
# AT this code is quite similar to endTagTable in "InTable"
|
||||
@tree.generateImpliedEndTags
|
||||
|
||||
unless @tree.openElements[-1].name == 'caption'
|
||||
@parser.parseError(_("Unexpected end tag (caption). Missing end tags."))
|
||||
end
|
||||
|
||||
remove_open_elements_until('caption')
|
||||
|
||||
@tree.clearActiveFormattingElements
|
||||
@parser.phase = @parser.phases[:inTable]
|
||||
end
|
||||
end
|
||||
|
||||
def endTagTable(name)
|
||||
@parser.parseError
|
||||
ignoreEndTag = ignoreEndTagCaption
|
||||
@parser.phase.processEndTag('caption')
|
||||
@parser.phase.processEndTag(name) unless ignoreEndTag
|
||||
end
|
||||
|
||||
def endTagIgnore(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.phases[:inBody].processEndTag(name)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
78
vendor/plugins/HTML5lib/lib/html5/html5parser/in_cell_phase.rb
vendored
Normal file
78
vendor/plugins/HTML5lib/lib/html5/html5parser/in_cell_phase.rb
vendored
Normal file
|
@ -0,0 +1,78 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InCellPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-cell
|
||||
|
||||
handle_start 'html', %w( caption col colgroup tbody td tfoot th thead tr ) => 'TableOther'
|
||||
|
||||
handle_end %w( td th ) => 'TableCell', %w( body caption col colgroup html ) => 'Ignore'
|
||||
|
||||
handle_end %w( table tbody tfoot thead tr ) => 'Imply'
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.phases[:inBody].processCharacters(data)
|
||||
end
|
||||
|
||||
def startTagTableOther(name, attributes)
|
||||
if in_scope?('td', true) or in_scope?('th', true)
|
||||
closeCell
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
else
|
||||
# innerHTML case
|
||||
@parser.parseError
|
||||
end
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagTableCell(name)
|
||||
if in_scope?(name, true)
|
||||
@tree.generateImpliedEndTags(name)
|
||||
if @tree.openElements[-1].name != name
|
||||
@parser.parseError("Got table cell end tag (#{name}) while required end tags are missing.")
|
||||
|
||||
remove_open_elements_until(name)
|
||||
else
|
||||
@tree.openElements.pop
|
||||
end
|
||||
@tree.clearActiveFormattingElements
|
||||
@parser.phase = @parser.phases[:inRow]
|
||||
else
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
end
|
||||
|
||||
def endTagIgnore(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
|
||||
def endTagImply(name)
|
||||
if in_scope?(name, true)
|
||||
closeCell
|
||||
@parser.phase.processEndTag(name)
|
||||
else
|
||||
# sometimes innerHTML case
|
||||
@parser.parseError
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.phases[:inBody].processEndTag(name)
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def closeCell
|
||||
if in_scope?('td', true)
|
||||
endTagTableCell('td')
|
||||
elsif in_scope?('th', true)
|
||||
endTagTableCell('th')
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
55
vendor/plugins/HTML5lib/lib/html5/html5parser/in_column_group_phase.rb
vendored
Normal file
55
vendor/plugins/HTML5lib/lib/html5/html5parser/in_column_group_phase.rb
vendored
Normal file
|
@ -0,0 +1,55 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InColumnGroupPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-column
|
||||
|
||||
handle_start 'html', 'col'
|
||||
|
||||
handle_end 'colgroup', 'col'
|
||||
|
||||
def ignoreEndTagColgroup
|
||||
@tree.openElements[-1].name == 'html'
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
ignoreEndTag = ignoreEndTagColgroup
|
||||
endTagColgroup("colgroup")
|
||||
@parser.phase.processCharacters(data) unless ignoreEndTag
|
||||
end
|
||||
|
||||
def startTagCol(name, attributes)
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.openElements.pop
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
ignoreEndTag = ignoreEndTagColgroup
|
||||
endTagColgroup('colgroup')
|
||||
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
|
||||
end
|
||||
|
||||
def endTagColgroup(name)
|
||||
if ignoreEndTagColgroup
|
||||
# innerHTML case
|
||||
assert @parser.innerHTML
|
||||
@parser.parseError
|
||||
else
|
||||
@tree.openElements.pop
|
||||
@parser.phase = @parser.phases[:inTable]
|
||||
end
|
||||
end
|
||||
|
||||
def endTagCol(name)
|
||||
@parser.parseError(_('Unexpected end tag (col). col has no end tag.'))
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
ignoreEndTag = ignoreEndTagColgroup
|
||||
endTagColgroup('colgroup')
|
||||
@parser.phase.processEndTag(name) unless ignoreEndTag
|
||||
end
|
||||
|
||||
end
|
||||
end
|
57
vendor/plugins/HTML5lib/lib/html5/html5parser/in_frameset_phase.rb
vendored
Normal file
57
vendor/plugins/HTML5lib/lib/html5/html5parser/in_frameset_phase.rb
vendored
Normal file
|
@ -0,0 +1,57 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InFramesetPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
|
||||
|
||||
handle_start 'html', 'frameset', 'frame', 'noframes'
|
||||
|
||||
handle_end 'frameset', 'noframes'
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.parseError(_('Unexpected characters in the frameset phase. Characters ignored.'))
|
||||
end
|
||||
|
||||
def startTagFrameset(name, attributes)
|
||||
@tree.insertElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagFrame(name, attributes)
|
||||
@tree.insertElement(name, attributes)
|
||||
@tree.openElements.pop
|
||||
end
|
||||
|
||||
def startTagNoframes(name, attributes)
|
||||
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag token (#{name}) in the frameset phase. Ignored"))
|
||||
end
|
||||
|
||||
def endTagFrameset(name)
|
||||
if @tree.openElements[-1].name == 'html'
|
||||
# innerHTML case
|
||||
@parser.parseError(_("Unexpected end tag token (frameset) in the frameset phase (innerHTML)."))
|
||||
else
|
||||
@tree.openElements.pop
|
||||
end
|
||||
if (not @parser.innerHTML and
|
||||
@tree.openElements[-1].name != 'frameset')
|
||||
# If we're not in innerHTML mode and the the current node is not a
|
||||
# "frameset" element (anymore) then switch.
|
||||
@parser.phase = @parser.phases[:afterFrameset]
|
||||
end
|
||||
end
|
||||
|
||||
def endTagNoframes(name)
|
||||
@parser.phases[:inBody].processEndTag(name)
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.parseError(_("Unexpected end tag token (#{name}) in the frameset phase. Ignored."))
|
||||
end
|
||||
|
||||
end
|
||||
end
|
126
vendor/plugins/HTML5lib/lib/html5/html5parser/in_head_phase.rb
vendored
Normal file
126
vendor/plugins/HTML5lib/lib/html5/html5parser/in_head_phase.rb
vendored
Normal file
|
@ -0,0 +1,126 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InHeadPhase < Phase
|
||||
|
||||
handle_start 'html', 'head', 'title', 'style', 'script', %w( base link meta )
|
||||
|
||||
handle_end 'head'
|
||||
handle_end %w( html body br p ) => 'ImplyAfterHead'
|
||||
handle_end %w( title style script )
|
||||
|
||||
def processEOF
|
||||
if ['title', 'style', 'script'].include?(name = @tree.openElements[-1].name)
|
||||
@parser.parseError(_("Unexpected end of file. Expected end tag (#{name})."))
|
||||
@tree.openElements.pop
|
||||
end
|
||||
anythingElse
|
||||
@parser.phase.processEOF
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
if ['title', 'style', 'script'].include?(@tree.openElements[-1].name)
|
||||
@tree.insertText(data)
|
||||
else
|
||||
anythingElse
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
end
|
||||
|
||||
def startTagHead(name, attributes)
|
||||
@parser.parseError(_('Unexpected start tag head in existing head. Ignored'))
|
||||
end
|
||||
|
||||
def startTagTitle(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
appendToHead(element)
|
||||
@tree.openElements.push(element)
|
||||
@parser.tokenizer.contentModelFlag = :RCDATA
|
||||
end
|
||||
|
||||
def startTagStyle(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
|
||||
appendToHead(element)
|
||||
else
|
||||
@tree.openElements[-1].appendChild(element)
|
||||
end
|
||||
@tree.openElements.push(element)
|
||||
@parser.tokenizer.contentModelFlag = :CDATA
|
||||
end
|
||||
|
||||
def startTagScript(name, attributes)
|
||||
#XXX Inner HTML case may be wrong
|
||||
element = @tree.createElement(name, attributes)
|
||||
element._flags.push("parser-inserted")
|
||||
if (@tree.headPointer != nil and
|
||||
@parser.phase == @parser.phases[:inHead])
|
||||
appendToHead(element)
|
||||
else
|
||||
@tree.openElements[-1].appendChild(element)
|
||||
end
|
||||
@tree.openElements.push(element)
|
||||
@parser.tokenizer.contentModelFlag = :CDATA
|
||||
end
|
||||
|
||||
def startTagBaseLinkMeta(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
if @tree.headPointer != nil and @parser.phase == @parser.phases[:inHead]
|
||||
appendToHead(element)
|
||||
else
|
||||
@tree.openElements[-1].appendChild(element)
|
||||
end
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
anythingElse
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagHead(name)
|
||||
if @tree.openElements[-1].name == 'head'
|
||||
@tree.openElements.pop
|
||||
else
|
||||
@parser.parseError(_("Unexpected end tag (head). Ignored."))
|
||||
end
|
||||
@parser.phase = @parser.phases[:afterHead]
|
||||
end
|
||||
|
||||
def endTagImplyAfterHead(name)
|
||||
anythingElse
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
def endTagTitleStyleScript(name)
|
||||
if @tree.openElements[-1].name == name
|
||||
@tree.openElements.pop
|
||||
else
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
|
||||
def anythingElse
|
||||
if @tree.openElements[-1].name == 'head'
|
||||
endTagHead('head')
|
||||
else
|
||||
@parser.phase = @parser.phases[:afterHead]
|
||||
end
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def appendToHead(element)
|
||||
if @tree.headPointer.nil?
|
||||
assert @parser.innerHTML
|
||||
@tree.openElements[-1].appendChild(element)
|
||||
else
|
||||
@tree.headPointer.appendChild(element)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
87
vendor/plugins/HTML5lib/lib/html5/html5parser/in_row_phase.rb
vendored
Normal file
87
vendor/plugins/HTML5lib/lib/html5/html5parser/in_row_phase.rb
vendored
Normal file
|
@ -0,0 +1,87 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InRowPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-row
|
||||
|
||||
handle_start 'html', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead tr ) => 'TableOther'
|
||||
|
||||
handle_end 'tr', 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th ) => 'Ignore'
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.phases[:inTable].processCharacters(data)
|
||||
end
|
||||
|
||||
def startTagTableCell(name, attributes)
|
||||
clearStackToTableRowContext
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.phase = @parser.phases[:inCell]
|
||||
@tree.activeFormattingElements.push(Marker)
|
||||
end
|
||||
|
||||
def startTagTableOther(name, attributes)
|
||||
ignoreEndTag = ignoreEndTagTr
|
||||
endTagTr('tr')
|
||||
# XXX how are we sure it's always ignored in the innerHTML case?
|
||||
@parser.phase.processStartTag(name, attributes) unless ignoreEndTag
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@parser.phases[:inTable].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagTr(name)
|
||||
if ignoreEndTagTr
|
||||
# innerHTML case
|
||||
assert @parser.innerHTML
|
||||
@parser.parseError
|
||||
else
|
||||
clearStackToTableRowContext
|
||||
@tree.openElements.pop
|
||||
@parser.phase = @parser.phases[:inTableBody]
|
||||
end
|
||||
end
|
||||
|
||||
def endTagTable(name)
|
||||
ignoreEndTag = ignoreEndTagTr
|
||||
endTagTr('tr')
|
||||
# Reprocess the current tag if the tr end tag was not ignored
|
||||
# XXX how are we sure it's always ignored in the innerHTML case?
|
||||
@parser.phase.processEndTag(name) unless ignoreEndTag
|
||||
end
|
||||
|
||||
def endTagTableRowGroup(name)
|
||||
if in_scope?(name, true)
|
||||
endTagTr('tr')
|
||||
@parser.phase.processEndTag(name)
|
||||
else
|
||||
# innerHTML case
|
||||
@parser.parseError
|
||||
end
|
||||
end
|
||||
|
||||
def endTagIgnore(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}) in the row phase. Ignored."))
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.phases[:inTable].processEndTag(name)
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
# XXX unify this with other table helper methods
|
||||
def clearStackToTableRowContext
|
||||
until ['tr', 'html'].include?(name = @tree.openElements[-1].name)
|
||||
@parser.parseError(_("Unexpected implied end tag (#{name}) in the row phase."))
|
||||
@tree.openElements.pop
|
||||
end
|
||||
end
|
||||
|
||||
def ignoreEndTagTr
|
||||
not in_scope?('tr', :tableVariant => true)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
84
vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb
vendored
Normal file
84
vendor/plugins/HTML5lib/lib/html5/html5parser/in_select_phase.rb
vendored
Normal file
|
@ -0,0 +1,84 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InSelectPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
|
||||
|
||||
handle_start 'html', 'option', 'optgroup', 'select'
|
||||
|
||||
handle_end 'option', 'optgroup', 'select', %w( caption table tbody tfoot thead tr td th ) => 'TableElements'
|
||||
|
||||
def processCharacters(data)
|
||||
@tree.insertText(data)
|
||||
end
|
||||
|
||||
def startTagOption(name, attributes)
|
||||
# We need to imply </option> if <option> is the current node.
|
||||
@tree.openElements.pop if @tree.openElements[-1].name == 'option'
|
||||
@tree.insertElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagOptgroup(name, attributes)
|
||||
@tree.openElements.pop if @tree.openElements[-1].name == 'option'
|
||||
@tree.openElements.pop if @tree.openElements[-1].name == 'optgroup'
|
||||
@tree.insertElement(name, attributes)
|
||||
end
|
||||
|
||||
def startTagSelect(name, attributes)
|
||||
@parser.parseError(_('Unexpected start tag (select) in the select phase implies select start tag.'))
|
||||
endTagSelect('select')
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@parser.parseError(_('Unexpected start tag token (#{name}) in the select phase. Ignored.'))
|
||||
end
|
||||
|
||||
def endTagOption(name)
|
||||
if @tree.openElements[-1].name == 'option'
|
||||
@tree.openElements.pop
|
||||
else
|
||||
@parser.parseError(_('Unexpected end tag (option) in the select phase. Ignored.'))
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOptgroup(name)
|
||||
# </optgroup> implicitly closes <option>
|
||||
if @tree.openElements[-1].name == 'option' and @tree.openElements[-2].name == 'optgroup'
|
||||
@tree.openElements.pop
|
||||
end
|
||||
# It also closes </optgroup>
|
||||
if @tree.openElements[-1].name == 'optgroup'
|
||||
@tree.openElements.pop
|
||||
# But nothing else
|
||||
else
|
||||
@parser.parseError(_('Unexpected end tag (optgroup) in the select phase. Ignored.'))
|
||||
end
|
||||
end
|
||||
|
||||
def endTagSelect(name)
|
||||
if in_scope?('select', true)
|
||||
remove_open_elements_until('select')
|
||||
|
||||
@parser.resetInsertionMode
|
||||
else
|
||||
# innerHTML case
|
||||
@parser.parseError
|
||||
end
|
||||
end
|
||||
|
||||
def endTagTableElements(name)
|
||||
@parser.parseError(_("Unexpected table end tag (#{name}) in the select phase."))
|
||||
|
||||
if in_scope?(name, true)
|
||||
endTagSelect('select')
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.parseError(_("Unexpected end tag token (#{name}) in the select phase. Ignored."))
|
||||
end
|
||||
|
||||
end
|
||||
end
|
83
vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_body_phase.rb
vendored
Normal file
83
vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_body_phase.rb
vendored
Normal file
|
@ -0,0 +1,83 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InTableBodyPhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
|
||||
|
||||
handle_start 'html', 'tr', %w( td th ) => 'TableCell', %w( caption col colgroup tbody tfoot thead ) => 'TableOther'
|
||||
|
||||
handle_end 'table', %w( tbody tfoot thead ) => 'TableRowGroup', %w( body caption col colgroup html td th tr ) => 'Ingore'
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.phases[:inTable].processCharacters(data)
|
||||
end
|
||||
|
||||
def startTagTr(name, attributes)
|
||||
clearStackToTableBodyContext
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.phase = @parser.phases[:inRow]
|
||||
end
|
||||
|
||||
def startTagTableCell(name, attributes)
|
||||
@parser.parseError(_("Unexpected table cell start tag (#{name}) in the table body phase."))
|
||||
startTagTr('tr', {})
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagTableOther(name, attributes)
|
||||
# XXX AT Any ideas on how to share this with endTagTable?
|
||||
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
|
||||
clearStackToTableBodyContext
|
||||
endTagTableRowGroup(@tree.openElements[-1].name)
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
else
|
||||
# innerHTML case
|
||||
@parser.parseError
|
||||
end
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@parser.phases[:inTable].processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def endTagTableRowGroup(name)
|
||||
if in_scope?(name, true)
|
||||
clearStackToTableBodyContext
|
||||
@tree.openElements.pop
|
||||
@parser.phase = @parser.phases[:inTable]
|
||||
else
|
||||
@parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
|
||||
end
|
||||
end
|
||||
|
||||
def endTagTable(name)
|
||||
if in_scope?('tbody', true) or in_scope?('thead', true) or in_scope?('tfoot', true)
|
||||
clearStackToTableBodyContext
|
||||
endTagTableRowGroup(@tree.openElements[-1].name)
|
||||
@parser.phase.processEndTag(name)
|
||||
else
|
||||
# innerHTML case
|
||||
@parser.parseError
|
||||
end
|
||||
end
|
||||
|
||||
def endTagIgnore(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}) in the table body phase. Ignored."))
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.phases[:inTable].processEndTag(name)
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def clearStackToTableBodyContext
|
||||
until ['tbody', 'tfoot', 'thead', 'html'].include?(name = @tree.openElements[-1].name)
|
||||
@parser.parseError(_("Unexpected implied end tag (#{name}) in the table body phase."))
|
||||
@tree.openElements.pop
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
110
vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_phase.rb
vendored
Normal file
110
vendor/plugins/HTML5lib/lib/html5/html5parser/in_table_phase.rb
vendored
Normal file
|
@ -0,0 +1,110 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InTablePhase < Phase
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-table
|
||||
|
||||
handle_start 'html', 'caption', 'colgroup', 'col', 'table'
|
||||
|
||||
handle_start %w( tbody tfoot thead ) => 'RowGroup', %w( td th tr ) => 'ImplyTbody'
|
||||
|
||||
handle_end 'table', %w( body caption col colgroup html tbody td tfoot th thead tr ) => 'Ignore'
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.parseError(_("Unexpected non-space characters in table context caused voodoo mode."))
|
||||
# Make all the special element rearranging voodoo kick in
|
||||
@tree.insertFromTable = true
|
||||
# Process the character in the "in body" mode
|
||||
@parser.phases[:inBody].processCharacters(data)
|
||||
@tree.insertFromTable = false
|
||||
end
|
||||
|
||||
def startTagCaption(name, attributes)
|
||||
clearStackToTableContext
|
||||
@tree.activeFormattingElements.push(Marker)
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.phase = @parser.phases[:inCaption]
|
||||
end
|
||||
|
||||
def startTagColgroup(name, attributes)
|
||||
clearStackToTableContext
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.phase = @parser.phases[:inColumnGroup]
|
||||
end
|
||||
|
||||
def startTagCol(name, attributes)
|
||||
startTagColgroup('colgroup', {})
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagRowGroup(name, attributes)
|
||||
clearStackToTableContext
|
||||
@tree.insertElement(name, attributes)
|
||||
@parser.phase = @parser.phases[:inTableBody]
|
||||
end
|
||||
|
||||
def startTagImplyTbody(name, attributes)
|
||||
startTagRowGroup('tbody', {})
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def startTagTable(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag (table) in table phase. Implies end tag (table)."))
|
||||
@parser.phase.processEndTag('table')
|
||||
@parser.phase.processStartTag(name, attributes) unless @parser.innerHTML
|
||||
end
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag (#{name}) in table context caused voodoo mode."))
|
||||
# Make all the special element rearranging voodoo kick in
|
||||
@tree.insertFromTable = true
|
||||
# Process the start tag in the "in body" mode
|
||||
@parser.phases[:inBody].processStartTag(name, attributes)
|
||||
@tree.insertFromTable = false
|
||||
end
|
||||
|
||||
def endTagTable(name)
|
||||
if in_scope?('table', true)
|
||||
@tree.generateImpliedEndTags
|
||||
|
||||
unless @tree.openElements[-1].name == 'table'
|
||||
@parser.parseError(_("Unexpected end tag (table). Expected end tag (#{@tree.openElements[-1].name})."))
|
||||
end
|
||||
|
||||
remove_open_elements_until('table')
|
||||
|
||||
@parser.resetInsertionMode
|
||||
else
|
||||
# innerHTML case
|
||||
assert @parser.innerHTML
|
||||
@parser.parseError
|
||||
end
|
||||
end
|
||||
|
||||
def endTagIgnore(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Ignored."))
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}) in table context caused voodoo mode."))
|
||||
# Make all the special element rearranging voodoo kick in
|
||||
@tree.insertFromTable = true
|
||||
# Process the end tag in the "in body" mode
|
||||
@parser.phases[:inBody].processEndTag(name)
|
||||
@tree.insertFromTable = false
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def clearStackToTableContext
|
||||
# "clear the stack back to a table context"
|
||||
until ['table', 'html'].include?(name = @tree.openElements[-1].name)
|
||||
@parser.parseError(_("Unexpected implied end tag (#{name}) in the table phase."))
|
||||
@tree.openElements.pop
|
||||
end
|
||||
# When the current node is <html> it's an innerHTML case
|
||||
end
|
||||
|
||||
end
|
||||
end
|
135
vendor/plugins/HTML5lib/lib/html5/html5parser/initial_phase.rb
vendored
Normal file
135
vendor/plugins/HTML5lib/lib/html5/html5parser/initial_phase.rb
vendored
Normal file
|
@ -0,0 +1,135 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class InitialPhase < Phase
|
||||
|
||||
# This phase deals with error handling as well which is currently not
|
||||
# covered in the specification. The error handling is typically known as
|
||||
# "quirks mode". It is expected that a future version of HTML5 will define this.
|
||||
|
||||
def processEOF
|
||||
@parser.parseError(_('Unexpected End of file. Expected DOCTYPE.'))
|
||||
@parser.phase = @parser.phases[:rootElement]
|
||||
@parser.phase.processEOF
|
||||
end
|
||||
|
||||
def processComment(data)
|
||||
@tree.insertComment(data, @tree.document)
|
||||
end
|
||||
|
||||
def processDoctype(name, publicId, systemId, correct)
|
||||
if name.downcase != 'html' or publicId or systemId
|
||||
@parser.parseError(_('Erroneous DOCTYPE.'))
|
||||
end
|
||||
# XXX need to update DOCTYPE tokens
|
||||
@tree.insertDoctype(name)
|
||||
|
||||
publicId = publicId.to_s.upcase
|
||||
|
||||
if name.downcase != 'html'
|
||||
# XXX quirks mode
|
||||
else
|
||||
if ["+//silmaril//dtd html pro v0r11 19970101//en",
|
||||
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//en",
|
||||
"-//as//dtd html 3.0 aswedit + extensions//en",
|
||||
"-//ietf//dtd html 2.0 level 1//en",
|
||||
"-//ietf//dtd html 2.0 level 2//en",
|
||||
"-//ietf//dtd html 2.0 strict level 1//en",
|
||||
"-//ietf//dtd html 2.0 strict level 2//en",
|
||||
"-//ietf//dtd html 2.0 strict//en",
|
||||
"-//ietf//dtd html 2.0//en",
|
||||
"-//ietf//dtd html 2.1e//en",
|
||||
"-//ietf//dtd html 3.0//en",
|
||||
"-//ietf//dtd html 3.0//en//",
|
||||
"-//ietf//dtd html 3.2 final//en",
|
||||
"-//ietf//dtd html 3.2//en",
|
||||
"-//ietf//dtd html 3//en",
|
||||
"-//ietf//dtd html level 0//en",
|
||||
"-//ietf//dtd html level 0//en//2.0",
|
||||
"-//ietf//dtd html level 1//en",
|
||||
"-//ietf//dtd html level 1//en//2.0",
|
||||
"-//ietf//dtd html level 2//en",
|
||||
"-//ietf//dtd html level 2//en//2.0",
|
||||
"-//ietf//dtd html level 3//en",
|
||||
"-//ietf//dtd html level 3//en//3.0",
|
||||
"-//ietf//dtd html strict level 0//en",
|
||||
"-//ietf//dtd html strict level 0//en//2.0",
|
||||
"-//ietf//dtd html strict level 1//en",
|
||||
"-//ietf//dtd html strict level 1//en//2.0",
|
||||
"-//ietf//dtd html strict level 2//en",
|
||||
"-//ietf//dtd html strict level 2//en//2.0",
|
||||
"-//ietf//dtd html strict level 3//en",
|
||||
"-//ietf//dtd html strict level 3//en//3.0",
|
||||
"-//ietf//dtd html strict//en",
|
||||
"-//ietf//dtd html strict//en//2.0",
|
||||
"-//ietf//dtd html strict//en//3.0",
|
||||
"-//ietf//dtd html//en",
|
||||
"-//ietf//dtd html//en//2.0",
|
||||
"-//ietf//dtd html//en//3.0",
|
||||
"-//metrius//dtd metrius presentational//en",
|
||||
"-//microsoft//dtd internet explorer 2.0 html strict//en",
|
||||
"-//microsoft//dtd internet explorer 2.0 html//en",
|
||||
"-//microsoft//dtd internet explorer 2.0 tables//en",
|
||||
"-//microsoft//dtd internet explorer 3.0 html strict//en",
|
||||
"-//microsoft//dtd internet explorer 3.0 html//en",
|
||||
"-//microsoft//dtd internet explorer 3.0 tables//en",
|
||||
"-//netscape comm. corp.//dtd html//en",
|
||||
"-//netscape comm. corp.//dtd strict html//en",
|
||||
"-//o'reilly and associates//dtd html 2.0//en",
|
||||
"-//o'reilly and associates//dtd html extended 1.0//en",
|
||||
"-//spyglass//dtd html 2.0 extended//en",
|
||||
"-//sq//dtd html 2.0 hotmetal + extensions//en",
|
||||
"-//sun microsystems corp.//dtd hotjava html//en",
|
||||
"-//sun microsystems corp.//dtd hotjava strict html//en",
|
||||
"-//w3c//dtd html 3 1995-03-24//en",
|
||||
"-//w3c//dtd html 3.2 draft//en",
|
||||
"-//w3c//dtd html 3.2 final//en",
|
||||
"-//w3c//dtd html 3.2//en",
|
||||
"-//w3c//dtd html 3.2s draft//en",
|
||||
"-//w3c//dtd html 4.0 frameset//en",
|
||||
"-//w3c//dtd html 4.0 transitional//en",
|
||||
"-//w3c//dtd html experimental 19960712//en",
|
||||
"-//w3c//dtd html experimental 970421//en",
|
||||
"-//w3c//dtd w3 html//en",
|
||||
"-//w3o//dtd w3 html 3.0//en",
|
||||
"-//w3o//dtd w3 html 3.0//en//",
|
||||
"-//w3o//dtd w3 html strict 3.0//en//",
|
||||
"-//webtechs//dtd mozilla html 2.0//en",
|
||||
"-//webtechs//dtd mozilla html//en",
|
||||
"-/w3c/dtd html 4.0 transitional/en",
|
||||
"html"].include?(publicId) or
|
||||
(systemId == nil and
|
||||
["-//w3c//dtd html 4.01 frameset//EN",
|
||||
"-//w3c//dtd html 4.01 transitional//EN"].include?(publicId)) or
|
||||
(systemId == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
|
||||
#XXX quirks mode
|
||||
end
|
||||
end
|
||||
|
||||
@parser.phase = @parser.phases[:rootElement]
|
||||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
@tree.insertText(data, @tree.document)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.parseError(_('Unexpected non-space characters. Expected DOCTYPE.'))
|
||||
@parser.phase = @parser.phases[:rootElement]
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def processStartTag(name, attributes)
|
||||
@parser.parseError(_("Unexpected start tag (#{name}). Expected DOCTYPE."))
|
||||
@parser.phase = @parser.phases[:rootElement]
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def processEndTag(name)
|
||||
@parser.parseError(_("Unexpected end tag (#{name}). Expected DOCTYPE."))
|
||||
@parser.phase = @parser.phases[:rootElement]
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
156
vendor/plugins/HTML5lib/lib/html5/html5parser/phase.rb
vendored
Normal file
156
vendor/plugins/HTML5lib/lib/html5/html5parser/phase.rb
vendored
Normal file
|
@ -0,0 +1,156 @@
|
|||
module HTML5
|
||||
# Base class for helper objects that implement each phase of processing.
|
||||
#
|
||||
# Handler methods should be in the following order (they can be omitted):
|
||||
#
|
||||
# * EOF
|
||||
# * Comment
|
||||
# * Doctype
|
||||
# * SpaceCharacters
|
||||
# * Characters
|
||||
# * StartTag
|
||||
# - startTag* methods
|
||||
# * EndTag
|
||||
# - endTag* methods
|
||||
#
|
||||
class Phase
|
||||
|
||||
# The following example call:
|
||||
#
|
||||
# tag_handlers('startTag', 'html', %( base link meta ), %( li dt dd ) => 'ListItem')
|
||||
#
|
||||
# ...would return a hash equal to this:
|
||||
#
|
||||
# { 'html' => 'startTagHtml',
|
||||
# 'base' => 'startTagBaseLinkMeta',
|
||||
# 'link' => 'startTagBaseLinkMeta',
|
||||
# 'meta' => 'startTagBaseLinkMeta',
|
||||
# 'li' => 'startTagListItem',
|
||||
# 'dt' => 'startTagListItem',
|
||||
# 'dd' => 'startTagListItem' }
|
||||
#
|
||||
def self.tag_handlers(prefix, *tags)
|
||||
mapping = {}
|
||||
if tags.last.is_a?(Hash)
|
||||
tags.pop.each do |names, handler_method_suffix|
|
||||
handler_method = prefix + handler_method_suffix
|
||||
Array(names).each { |name| mapping[name] = handler_method }
|
||||
end
|
||||
end
|
||||
tags.each do |names|
|
||||
names = Array(names)
|
||||
handler_method = prefix + names.map { |name| name.capitalize }.join
|
||||
names.each { |name| mapping[name] = handler_method }
|
||||
end
|
||||
return mapping
|
||||
end
|
||||
|
||||
def self.start_tag_handlers
|
||||
@start_tag_handlers ||= Hash.new('startTagOther')
|
||||
end
|
||||
|
||||
# Declare what start tags this Phase handles. Can be called more than once.
|
||||
#
|
||||
# Example usage:
|
||||
#
|
||||
# handle_start 'html'
|
||||
# # html start tags will be handled by a method named 'startTagHtml'
|
||||
#
|
||||
# handle_start %( base link meta )
|
||||
# # base, link and meta start tags will be handled by a method named 'startTagBaseLinkMeta'
|
||||
#
|
||||
# handle_start %( li dt dd ) => 'ListItem'
|
||||
# # li, dt, and dd start tags will be handled by a method named 'startTagListItem'
|
||||
#
|
||||
def self.handle_start(*tags)
|
||||
start_tag_handlers.update tag_handlers('startTag', *tags)
|
||||
end
|
||||
|
||||
def self.end_tag_handlers
|
||||
@end_tag_handlers ||= Hash.new('endTagOther')
|
||||
end
|
||||
|
||||
# Declare what end tags this Phase handles. Behaves like handle_start.
|
||||
#
|
||||
def self.handle_end(*tags)
|
||||
end_tag_handlers.update tag_handlers('endTag', *tags)
|
||||
end
|
||||
|
||||
def initialize(parser, tree)
|
||||
@parser, @tree = parser, tree
|
||||
end
|
||||
|
||||
def processEOF
|
||||
@tree.generateImpliedEndTags
|
||||
|
||||
if @tree.openElements.length > 2
|
||||
@parser.parseError(_('Unexpected end of file. Missing closing tags.'))
|
||||
elsif @tree.openElements.length == 2 and @tree.openElements[1].name != 'body'
|
||||
# This happens for framesets or something?
|
||||
@parser.parseError(_("Unexpected end of file. Expected end tag (#{@tree.openElements[1].name}) first."))
|
||||
elsif @parser.innerHTML and @tree.openElements.length > 1
|
||||
# XXX This is not what the specification says. Not sure what to do here.
|
||||
@parser.parseError(_('XXX innerHTML EOF'))
|
||||
end
|
||||
# Betting ends.
|
||||
end
|
||||
|
||||
def processComment(data)
|
||||
# For most phases the following is correct. Where it's not it will be
|
||||
# overridden.
|
||||
@tree.insertComment(data, @tree.openElements[-1])
|
||||
end
|
||||
|
||||
def processDoctype(name, publicId, systemId, correct)
|
||||
@parser.parseError(_('Unexpected DOCTYPE. Ignored.'))
|
||||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
@tree.insertText(data)
|
||||
end
|
||||
|
||||
def processStartTag(name, attributes)
|
||||
send self.class.start_tag_handlers[name], name, attributes
|
||||
end
|
||||
|
||||
def startTagHtml(name, attributes)
|
||||
if @parser.firstStartTag == false and name == 'html'
|
||||
@parser.parseError(_('html needs to be the first start tag.'))
|
||||
end
|
||||
# XXX Need a check here to see if the first start tag token emitted is
|
||||
# this token... If it's not, invoke @parser.parseError.
|
||||
attributes.each do |attr, value|
|
||||
unless @tree.openElements[0].attributes.has_key?(attr)
|
||||
@tree.openElements[0].attributes[attr] = value
|
||||
end
|
||||
end
|
||||
@parser.firstStartTag = false
|
||||
end
|
||||
|
||||
def processEndTag(name)
|
||||
send self.class.end_tag_handlers[name], name
|
||||
end
|
||||
|
||||
def _(string)
|
||||
string
|
||||
end
|
||||
|
||||
def assert(value)
|
||||
throw AssertionError.new unless value
|
||||
end
|
||||
|
||||
def in_scope?(*args)
|
||||
@tree.elementInScope(*args)
|
||||
end
|
||||
|
||||
def remove_open_elements_until(name=nil)
|
||||
finished = false
|
||||
until finished
|
||||
element = @tree.openElements.pop
|
||||
finished = name.nil?? yield(element) : element.name == name
|
||||
end
|
||||
return element
|
||||
end
|
||||
|
||||
end
|
||||
end
|
43
vendor/plugins/HTML5lib/lib/html5/html5parser/root_element_phase.rb
vendored
Normal file
43
vendor/plugins/HTML5lib/lib/html5/html5parser/root_element_phase.rb
vendored
Normal file
|
@ -0,0 +1,43 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class RootElementPhase < Phase
|
||||
|
||||
def processEOF
|
||||
insertHtmlElement
|
||||
@parser.phase.processEOF
|
||||
end
|
||||
|
||||
def processComment(data)
|
||||
@tree.insertComment(data, @tree.document)
|
||||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
@tree.insertText(data, @tree.document)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
insertHtmlElement
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def processStartTag(name, attributes)
|
||||
@parser.firstStartTag = true if name == 'html'
|
||||
insertHtmlElement
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def processEndTag(name)
|
||||
insertHtmlElement
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
def insertHtmlElement
|
||||
element = @tree.createElement('html', {})
|
||||
@tree.openElements.push(element)
|
||||
@tree.document.appendChild(element)
|
||||
@parser.phase = @parser.phases[:beforeHead]
|
||||
end
|
||||
|
||||
end
|
||||
end
|
36
vendor/plugins/HTML5lib/lib/html5/html5parser/trailing_end_phase.rb
vendored
Normal file
36
vendor/plugins/HTML5lib/lib/html5/html5parser/trailing_end_phase.rb
vendored
Normal file
|
@ -0,0 +1,36 @@
|
|||
require 'html5/html5parser/phase'
|
||||
|
||||
module HTML5
|
||||
class TrailingEndPhase < Phase
|
||||
|
||||
def processEOF
|
||||
end
|
||||
|
||||
def processComment(data)
|
||||
@tree.insertComment(data, @tree.document)
|
||||
end
|
||||
|
||||
def processSpaceCharacters(data)
|
||||
@parser.lastPhase.processSpaceCharacters(data)
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
@parser.parseError(_('Unexpected non-space characters. Expected end of file.'))
|
||||
@parser.phase = @parser.lastPhase
|
||||
@parser.phase.processCharacters(data)
|
||||
end
|
||||
|
||||
def processStartTag(name, attributes)
|
||||
@parser.parseError(_('Unexpected start tag (#{name}). Expected end of file.'))
|
||||
@parser.phase = @parser.lastPhase
|
||||
@parser.phase.processStartTag(name, attributes)
|
||||
end
|
||||
|
||||
def processEndTag(name)
|
||||
@parser.parseError(_('Unexpected end tag (#{name}). Expected end of file.'))
|
||||
@parser.phase = @parser.lastPhase
|
||||
@parser.phase.processEndTag(name)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
654
vendor/plugins/HTML5lib/lib/html5/inputstream.rb
vendored
Executable file
654
vendor/plugins/HTML5lib/lib/html5/inputstream.rb
vendored
Executable file
|
@ -0,0 +1,654 @@
|
|||
require 'stringio'
|
||||
require 'html5/constants'
|
||||
|
||||
module HTML5
|
||||
|
||||
# Provides a unicode stream of characters to the HTMLTokenizer.
|
||||
|
||||
# This class takes care of character encoding and removing or replacing
|
||||
# incorrect byte-sequences and also provides column and line tracking.
|
||||
|
||||
class HTMLInputStream
|
||||
|
||||
attr_accessor :queue, :char_encoding, :errors
|
||||
|
||||
# Initialises the HTMLInputStream.
|
||||
#
|
||||
# HTMLInputStream(source, [encoding]) -> Normalized stream from source
|
||||
# for use by the HTML5Lib.
|
||||
#
|
||||
# source can be either a file-object, local filename or a string.
|
||||
#
|
||||
# The optional encoding parameter must be a string that indicates
|
||||
# the encoding. If specified, that encoding will be used,
|
||||
# regardless of any BOM or later declaration (such as in a meta
|
||||
# element)
|
||||
#
|
||||
# parseMeta - Look for a <meta> element containing encoding information
|
||||
|
||||
def initialize(source, options = {})
|
||||
@encoding = nil
|
||||
@parse_meta = true
|
||||
@chardet = true
|
||||
|
||||
options.each { |name, value| instance_variable_set("@#{name}", value) }
|
||||
|
||||
# Raw Stream
|
||||
@raw_stream = open_stream(source)
|
||||
|
||||
# Encoding Information
|
||||
#Number of bytes to use when looking for a meta element with
|
||||
#encoding information
|
||||
@NUM_BYTES_META = 512
|
||||
#Number of bytes to use when using detecting encoding using chardet
|
||||
@NUM_BYTES_CHARDET = 256
|
||||
#Number of bytes to use when reading content
|
||||
@NUM_BYTES_BUFFER = 1024
|
||||
|
||||
#Encoding to use if no other information can be found
|
||||
@DEFAULT_ENCODING = 'windows-1252'
|
||||
|
||||
#Detect encoding iff no explicit "transport level" encoding is supplied
|
||||
if @encoding.nil? or not HTML5.is_valid_encoding(@encoding)
|
||||
@char_encoding = detect_encoding
|
||||
else
|
||||
@char_encoding = @encoding
|
||||
end
|
||||
|
||||
# Read bytes from stream decoding them into Unicode
|
||||
@buffer = @raw_stream.read(@NUM_BYTES_BUFFER) || ''
|
||||
if @char_encoding == 'windows-1252'
|
||||
@win1252 = true
|
||||
elsif @char_encoding != 'utf-8'
|
||||
begin
|
||||
require 'iconv'
|
||||
begin
|
||||
@buffer << @raw_stream.read unless @raw_stream.eof?
|
||||
@buffer = Iconv.iconv('utf-8', @char_encoding, @buffer).first
|
||||
rescue
|
||||
@win1252 = true
|
||||
end
|
||||
rescue LoadError
|
||||
@win1252 = true
|
||||
end
|
||||
end
|
||||
|
||||
@queue = []
|
||||
@errors = []
|
||||
|
||||
# Reset position in the list to read from
|
||||
@tell = 0
|
||||
@line = @col = 0
|
||||
@line_lengths = []
|
||||
end
|
||||
|
||||
# Produces a file object from source.
|
||||
#
|
||||
# source can be either a file object, local filename or a string.
|
||||
def open_stream(source)
|
||||
# Already an IO like object
|
||||
if source.respond_to?(:read)
|
||||
@stream = source
|
||||
else
|
||||
# Treat source as a string and wrap in StringIO
|
||||
@stream = StringIO.new(source)
|
||||
end
|
||||
return @stream
|
||||
end
|
||||
|
||||
def detect_encoding
|
||||
|
||||
#First look for a BOM
|
||||
#This will also read past the BOM if present
|
||||
encoding = detect_bom
|
||||
|
||||
#If there is no BOM need to look for meta elements with encoding
|
||||
#information
|
||||
if encoding.nil? and @parse_meta
|
||||
encoding = detect_encoding_meta
|
||||
end
|
||||
|
||||
#Guess with chardet, if avaliable
|
||||
if encoding.nil? and @chardet
|
||||
begin
|
||||
require 'rubygems'
|
||||
require 'UniversalDetector' # gem install chardet
|
||||
buffers = []
|
||||
detector = UniversalDetector::Detector.instance
|
||||
detector.reset
|
||||
until @raw_stream.eof?
|
||||
buffer = @raw_stream.read(@NUM_BYTES_CHARDET)
|
||||
break if !buffer or buffer.empty?
|
||||
buffers << buffer
|
||||
detector.feed(buffer)
|
||||
break if detector.instance_eval {@done}
|
||||
detector.instance_eval {
|
||||
@_mLastChar = @_mLastChar.chr if Fixnum === @_mLastChar
|
||||
}
|
||||
end
|
||||
detector.close
|
||||
encoding = detector.result['encoding']
|
||||
seek(buffers*'', 0)
|
||||
rescue LoadError
|
||||
end
|
||||
end
|
||||
|
||||
# If all else fails use the default encoding
|
||||
if encoding.nil?
|
||||
encoding = @DEFAULT_ENCODING
|
||||
end
|
||||
|
||||
#Substitute for equivalent encodings
|
||||
encoding_sub = {'iso-8859-1' => 'windows-1252'}
|
||||
|
||||
if encoding_sub.has_key?(encoding.downcase)
|
||||
encoding = encoding_sub[encoding.downcase]
|
||||
end
|
||||
|
||||
return encoding
|
||||
end
|
||||
|
||||
# Attempts to detect at BOM at the start of the stream. If
|
||||
# an encoding can be determined from the BOM return the name of the
|
||||
# encoding otherwise return nil
|
||||
def detect_bom
|
||||
bom_dict = {
|
||||
"\xef\xbb\xbf" => 'utf-8',
|
||||
"\xff\xfe" => 'utf-16le',
|
||||
"\xfe\xff" => 'utf-16be',
|
||||
"\xff\xfe\x00\x00" => 'utf-32le',
|
||||
"\x00\x00\xfe\xff" => 'utf-32be'
|
||||
}
|
||||
|
||||
# Go to beginning of file and read in 4 bytes
|
||||
string = @raw_stream.read(4)
|
||||
return nil unless string
|
||||
|
||||
# Try detecting the BOM using bytes from the string
|
||||
encoding = bom_dict[string[0...3]] # UTF-8
|
||||
seek = 3
|
||||
unless encoding
|
||||
# Need to detect UTF-32 before UTF-16
|
||||
encoding = bom_dict[string] # UTF-32
|
||||
seek = 4
|
||||
unless encoding
|
||||
encoding = bom_dict[string[0...2]] # UTF-16
|
||||
seek = 2
|
||||
end
|
||||
end
|
||||
|
||||
# Set the read position past the BOM if one was found, otherwise
|
||||
# set it to the start of the stream
|
||||
seek(string, encoding ? seek : 0)
|
||||
|
||||
return encoding
|
||||
end
|
||||
|
||||
def seek(buffer, n)
|
||||
if @raw_stream.respond_to?(:unget)
|
||||
@raw_stream.unget(buffer[n..-1])
|
||||
return
|
||||
end
|
||||
|
||||
if @raw_stream.respond_to?(:seek)
|
||||
begin
|
||||
@raw_stream.seek(n)
|
||||
return
|
||||
rescue Errno::ESPIPE
|
||||
end
|
||||
end
|
||||
|
||||
require 'delegate'
|
||||
@raw_stream = SimpleDelegator.new(@raw_stream)
|
||||
|
||||
class << @raw_stream
|
||||
def read(chars=-1)
|
||||
if chars == -1 or chars > @data.length
|
||||
result = @data
|
||||
@data = ''
|
||||
return result if __getobj__.eof?
|
||||
return result + __getobj__.read if chars == -1
|
||||
return result + __getobj__.read(chars-result.length)
|
||||
elsif @data.empty?
|
||||
return __getobj__.read(chars)
|
||||
else
|
||||
result = @data[1...chars]
|
||||
@data = @data[chars..-1]
|
||||
return result
|
||||
end
|
||||
end
|
||||
|
||||
def unget(data)
|
||||
if !@data or @data.empty?
|
||||
@data = data
|
||||
else
|
||||
@data += data
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@raw_stream.unget(buffer[n .. -1])
|
||||
end
|
||||
|
||||
# Report the encoding declared by the meta element
|
||||
def detect_encoding_meta
|
||||
buffer = @raw_stream.read(@NUM_BYTES_META)
|
||||
parser = EncodingParser.new(buffer)
|
||||
seek(buffer, 0)
|
||||
return parser.get_encoding
|
||||
end
|
||||
|
||||
# Returns (line, col) of the current position in the stream.
|
||||
def position
|
||||
line, col = @line, @col
|
||||
@queue.reverse.each do |c|
|
||||
if c == "\n"
|
||||
line -= 1
|
||||
raise RuntimeError.new("col=#{col}") unless col == 0
|
||||
col = @line_lengths[line]
|
||||
else
|
||||
col -= 1
|
||||
end
|
||||
end
|
||||
return [line+1, col]
|
||||
end
|
||||
|
||||
# Read one character from the stream or queue if available. Return
|
||||
# EOF when EOF is reached.
|
||||
def char
|
||||
unless @queue.empty?
|
||||
return @queue.shift
|
||||
else
|
||||
if @tell + 3 > @buffer.length and !@raw_stream.eof?
|
||||
# read next block
|
||||
@buffer = @buffer[@tell .. -1] + @raw_stream.read(@NUM_BYTES_BUFFER)
|
||||
@tell = 0
|
||||
end
|
||||
|
||||
c = @buffer[@tell]
|
||||
@tell += 1
|
||||
|
||||
case c
|
||||
when 0x01 .. 0x7F
|
||||
if c == 0x0D
|
||||
# normalize newlines
|
||||
@tell += 1 if @buffer[@tell] == 0x0A
|
||||
c = 0x0A
|
||||
end
|
||||
|
||||
# update position in stream
|
||||
if c == 0x0a
|
||||
@line_lengths << @col
|
||||
@line += 1
|
||||
@col = 0
|
||||
else
|
||||
@col += 1
|
||||
end
|
||||
|
||||
c.chr
|
||||
|
||||
when 0x80 .. 0xBF
|
||||
if !@win1252
|
||||
[0xFFFD].pack('U') # invalid utf-8
|
||||
elsif c <= 0x9f
|
||||
[ENTITIES_WINDOWS1252[c-0x80]].pack('U')
|
||||
else
|
||||
"\xC2" + c.chr # convert to utf-8
|
||||
end
|
||||
|
||||
when 0xC0 .. 0xFF
|
||||
if @win1252
|
||||
"\xC3" + (c-64).chr # convert to utf-8
|
||||
elsif @buffer[@tell-1 .. @tell+3] =~ /^
|
||||
( [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|
||||
| \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|
||||
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
|
||||
| \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|
||||
| \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|
||||
| [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|
||||
| \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
|
||||
)/x
|
||||
@tell += $1.length - 1
|
||||
$1
|
||||
else
|
||||
[0xFFFD].pack('U') # invalid utf-8
|
||||
end
|
||||
|
||||
when 0x00
|
||||
@errors.push('null character found in input stream, ' +
|
||||
'replaced with U+FFFD')
|
||||
[0xFFFD].pack('U') # null characters are invalid
|
||||
|
||||
else
|
||||
:EOF
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Returns a string of characters from the stream up to but not
|
||||
# including any character in characters or EOF. characters can be
|
||||
# any container that supports the in method being called on it.
|
||||
def chars_until(characters, opposite=false)
|
||||
char_stack = [char]
|
||||
|
||||
while char_stack.last != :EOF
|
||||
break unless (characters.include?(char_stack.last)) == opposite
|
||||
char_stack.push(char)
|
||||
end
|
||||
|
||||
# Put the character stopped on back to the front of the queue
|
||||
# from where it came.
|
||||
c = char_stack.pop
|
||||
@queue.insert(0, c) unless c == :EOF
|
||||
return char_stack.join('')
|
||||
end
|
||||
|
||||
def unget(characters)
|
||||
@queue.unshift(*characters.to_a) unless characters == :EOF
|
||||
end
|
||||
end
|
||||
|
||||
# String-like object with an assosiated position and various extra methods
|
||||
# If the position is ever greater than the string length then an exception is raised
|
||||
class EncodingBytes < String
|
||||
|
||||
attr_accessor :position
|
||||
|
||||
def initialize(value)
|
||||
super(value)
|
||||
@position = -1
|
||||
end
|
||||
|
||||
def each
|
||||
while @position < length
|
||||
@position += 1
|
||||
yield self[@position]
|
||||
end
|
||||
rescue EOF
|
||||
end
|
||||
|
||||
def current_byte
|
||||
raise EOF if @position >= length
|
||||
return self[@position].chr
|
||||
end
|
||||
|
||||
# Skip past a list of characters
|
||||
def skip(chars=SPACE_CHARACTERS)
|
||||
while chars.include?(current_byte)
|
||||
@position += 1
|
||||
end
|
||||
end
|
||||
|
||||
# Look for a sequence of bytes at the start of a string. If the bytes
|
||||
# are found return true and advance the position to the byte after the
|
||||
# match. Otherwise return false and leave the position alone
|
||||
def match_bytes(bytes, lower=false)
|
||||
data = self[position ... position+bytes.length]
|
||||
data.downcase! if lower
|
||||
rv = (data == bytes)
|
||||
@position += bytes.length if rv == true
|
||||
return rv
|
||||
end
|
||||
|
||||
# Look for the next sequence of bytes matching a given sequence. If
|
||||
# a match is found advance the position to the last byte of the match
|
||||
def jump_to(bytes)
|
||||
new_position = self[position .. -1].index(bytes)
|
||||
if new_position
|
||||
@position += (new_position + bytes.length-1)
|
||||
return true
|
||||
else
|
||||
raise EOF
|
||||
end
|
||||
end
|
||||
|
||||
# Move the pointer so it points to the next byte in a set of possible
|
||||
# bytes
|
||||
def find_next(byte_list)
|
||||
until byte_list.include?(current_byte)
|
||||
@position += 1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Mini parser for detecting character encoding from meta elements
|
||||
class EncodingParser
|
||||
|
||||
# string - the data to work on for encoding detection
|
||||
def initialize(data)
|
||||
@data = EncodingBytes.new(data.to_s)
|
||||
@encoding = nil
|
||||
end
|
||||
|
||||
@@method_dispatch = [
|
||||
['<!--', :handle_comment],
|
||||
['<meta', :handle_meta],
|
||||
['</', :handle_possible_end_tag],
|
||||
['<!', :handle_other],
|
||||
['<?', :handle_other],
|
||||
['<', :handle_possible_start_tag]
|
||||
]
|
||||
|
||||
def get_encoding
|
||||
@data.each do |byte|
|
||||
keep_parsing = true
|
||||
@@method_dispatch.each do |(key, method)|
|
||||
if @data.match_bytes(key, lower = true)
|
||||
keep_parsing = send(method)
|
||||
break
|
||||
end
|
||||
end
|
||||
break unless keep_parsing
|
||||
end
|
||||
@encoding = @encoding.strip unless @encoding.nil?
|
||||
return @encoding
|
||||
end
|
||||
|
||||
# Skip over comments
|
||||
def handle_comment
|
||||
return @data.jump_to('-->')
|
||||
end
|
||||
|
||||
def handle_meta
|
||||
# if we have <meta not followed by a space so just keep going
|
||||
return true unless SPACE_CHARACTERS.include?(@data.current_byte)
|
||||
|
||||
#We have a valid meta element we want to search for attributes
|
||||
while true
|
||||
#Try to find the next attribute after the current position
|
||||
attr = get_attribute
|
||||
|
||||
return true if attr.nil?
|
||||
|
||||
if attr[0] == 'charset'
|
||||
tentative_encoding = attr[1]
|
||||
if HTML5.is_valid_encoding(tentative_encoding)
|
||||
@encoding = tentative_encoding
|
||||
return false
|
||||
end
|
||||
elsif attr[0] == 'content'
|
||||
content_parser = ContentAttrParser.new(EncodingBytes.new(attr[1]))
|
||||
tentative_encoding = content_parser.parse
|
||||
if HTML5.is_valid_encoding(tentative_encoding)
|
||||
@encoding = tentative_encoding
|
||||
return false
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def handle_possible_start_tag
|
||||
return handle_possible_tag(false)
|
||||
end
|
||||
|
||||
def handle_possible_end_tag
|
||||
@data.position += 1
|
||||
return handle_possible_tag(true)
|
||||
end
|
||||
|
||||
def handle_possible_tag(end_tag)
|
||||
unless ASCII_LETTERS.include?(@data.current_byte)
|
||||
#If the next byte is not an ascii letter either ignore this
|
||||
#fragment (possible start tag case) or treat it according to
|
||||
#handleOther
|
||||
if end_tag
|
||||
@data.position -= 1
|
||||
handle_other
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
@data.find_next(SPACE_CHARACTERS + ['<', '>'])
|
||||
|
||||
if @data.current_byte == '<'
|
||||
#return to the first step in the overall "two step" algorithm
|
||||
#reprocessing the < byte
|
||||
@data.position -= 1
|
||||
else
|
||||
#Read all attributes
|
||||
{} until get_attribute.nil?
|
||||
end
|
||||
return true
|
||||
end
|
||||
|
||||
def handle_other
|
||||
return @data.jump_to('>')
|
||||
end
|
||||
|
||||
# Return a name,value pair for the next attribute in the stream,
|
||||
# if one is found, or nil
|
||||
def get_attribute
|
||||
@data.skip(SPACE_CHARACTERS + ['/'])
|
||||
|
||||
if @data.current_byte == '<'
|
||||
@data.position -= 1
|
||||
return nil
|
||||
elsif @data.current_byte == '>'
|
||||
return nil
|
||||
end
|
||||
|
||||
attr_name = []
|
||||
attr_value = []
|
||||
space_found = false
|
||||
#Step 5 attribute name
|
||||
while true
|
||||
if @data.current_byte == '=' and attr_name
|
||||
break
|
||||
elsif SPACE_CHARACTERS.include?(@data.current_byte)
|
||||
space_found = true
|
||||
break
|
||||
elsif ['/', '<', '>'].include?(@data.current_byte)
|
||||
return [attr_name.join(''), '']
|
||||
elsif ASCII_UPPERCASE.include?(@data.current_byte)
|
||||
attr_name.push(@data.current_byte.downcase)
|
||||
else
|
||||
attr_name.push(@data.current_byte)
|
||||
end
|
||||
#Step 6
|
||||
@data.position += 1
|
||||
end
|
||||
#Step 7
|
||||
if space_found
|
||||
@data.skip
|
||||
#Step 8
|
||||
unless @data.current_byte == '='
|
||||
@data.position -= 1
|
||||
return [attr_name.join(''), '']
|
||||
end
|
||||
end
|
||||
#XXX need to advance position in both spaces and value case
|
||||
#Step 9
|
||||
@data.position += 1
|
||||
#Step 10
|
||||
@data.skip
|
||||
#Step 11
|
||||
if ["'", '"'].include?(@data.current_byte)
|
||||
#11.1
|
||||
quote_char = @data.current_byte
|
||||
while true
|
||||
@data.position+=1
|
||||
#11.3
|
||||
if @data.current_byte == quote_char
|
||||
@data.position += 1
|
||||
return [attr_name.join(''), attr_value.join('')]
|
||||
#11.4
|
||||
elsif ASCII_UPPERCASE.include?(@data.current_byte)
|
||||
attr_value.push(@data.current_byte.downcase)
|
||||
#11.5
|
||||
else
|
||||
attr_value.push(@data.current_byte)
|
||||
end
|
||||
end
|
||||
elsif ['>', '<'].include?(@data.current_byte)
|
||||
return [attr_name.join(''), '']
|
||||
elsif ASCII_UPPERCASE.include?(@data.current_byte)
|
||||
attr_value.push(@data.current_byte.downcase)
|
||||
else
|
||||
attr_value.push(@data.current_byte)
|
||||
end
|
||||
while true
|
||||
@data.position += 1
|
||||
if (SPACE_CHARACTERS + ['>', '<']).include?(@data.current_byte)
|
||||
return [attr_name.join(''), attr_value.join('')]
|
||||
elsif ASCII_UPPERCASE.include?(@data.current_byte)
|
||||
attr_value.push(@data.current_byte.downcase)
|
||||
else
|
||||
attr_value.push(@data.current_byte)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class ContentAttrParser
|
||||
def initialize(data)
|
||||
@data = data
|
||||
end
|
||||
|
||||
def parse
|
||||
begin
|
||||
#Skip to the first ";"
|
||||
@data.position = 0
|
||||
@data.jump_to(';')
|
||||
@data.position += 1
|
||||
@data.skip
|
||||
#Check if the attr name is charset
|
||||
#otherwise return
|
||||
@data.jump_to('charset')
|
||||
@data.position += 1
|
||||
@data.skip
|
||||
unless @data.current_byte == '='
|
||||
#If there is no = sign keep looking for attrs
|
||||
return nil
|
||||
end
|
||||
@data.position += 1
|
||||
@data.skip
|
||||
#Look for an encoding between matching quote marks
|
||||
if ['"', "'"].include?(@data.current_byte)
|
||||
quote_mark = @data.current_byte
|
||||
@data.position += 1
|
||||
old_position = @data.position
|
||||
@data.jump_to(quote_mark)
|
||||
return @data[old_position ... @data.position]
|
||||
else
|
||||
#Unquoted value
|
||||
old_position = @data.position
|
||||
begin
|
||||
@data.find_next(SPACE_CHARACTERS)
|
||||
return @data[old_position ... @data.position]
|
||||
rescue EOF
|
||||
#Return the whole remaining value
|
||||
return @data[old_position .. -1]
|
||||
end
|
||||
end
|
||||
rescue EOF
|
||||
return nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Determine if a string is a supported encoding
|
||||
def self.is_valid_encoding(encoding)
|
||||
(not encoding.nil? and encoding.kind_of?(String) and ENCODINGS.include?(encoding.downcase.strip))
|
||||
end
|
||||
|
||||
end
|
158
vendor/plugins/HTML5lib/lib/html5/liberalxmlparser.rb
vendored
Executable file
158
vendor/plugins/HTML5lib/lib/html5/liberalxmlparser.rb
vendored
Executable file
|
@ -0,0 +1,158 @@
|
|||
# Warning: this module is experimental and subject to change and even removal
|
||||
# at any time.
|
||||
#
|
||||
# For background/rationale, see:
|
||||
# * http://www.intertwingly.net/blog/2007/01/08/Xhtml5lib
|
||||
# * http://tinyurl.com/ylfj8k (and follow-ups)
|
||||
#
|
||||
# References:
|
||||
# * http://googlereader.blogspot.com/2005/12/xml-errors-in-feeds.html
|
||||
# * http://wiki.whatwg.org/wiki/HtmlVsXhtml
|
||||
#
|
||||
# @@TODO:
|
||||
# * Selectively lowercase only XHTML, but not foreign markup
|
||||
require 'html5/html5parser'
|
||||
require 'html5/constants'
|
||||
|
||||
module HTML5
|
||||
|
||||
# liberal XML parser
|
||||
class XMLParser < HTMLParser
|
||||
|
||||
def initialize(options = {})
|
||||
super options
|
||||
@phases[:initial] = XmlRootPhase.new(self, @tree)
|
||||
end
|
||||
|
||||
def normalizeToken(token)
|
||||
case token[:type]
|
||||
when :StartTag, :EmptyTag
|
||||
# We need to remove the duplicate attributes and convert attributes
|
||||
# to a Hash so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
|
||||
|
||||
token[:data] = Hash[*token[:data].reverse.flatten]
|
||||
|
||||
# For EmptyTags, process both a Start and an End tag
|
||||
if token[:type] == :EmptyTag
|
||||
save = @tokenizer.contentModelFlag
|
||||
@phase.processStartTag(token[:name], token[:data])
|
||||
@tokenizer.contentModelFlag = save
|
||||
token[:data] = {}
|
||||
token[:type] = :EndTag
|
||||
end
|
||||
|
||||
when :Characters
|
||||
# un-escape RCDATA_ELEMENTS (e.g. style, script)
|
||||
if @tokenizer.contentModelFlag == :CDATA
|
||||
token[:data] = token[:data].
|
||||
gsub('<','<').gsub('>','>').gsub('&','&')
|
||||
end
|
||||
|
||||
when :EndTag
|
||||
if token[:data]
|
||||
parseError(_("End tag contains unexpected attributes."))
|
||||
end
|
||||
|
||||
when :Comment
|
||||
# Rescue CDATA from the comments
|
||||
if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]"
|
||||
token[:type] = :Characters
|
||||
token[:data] = token[:data][7 ... -2]
|
||||
end
|
||||
end
|
||||
|
||||
return token
|
||||
end
|
||||
end
|
||||
|
||||
# liberal XMTHML parser
|
||||
class XHTMLParser < XMLParser
|
||||
|
||||
def initialize(options = {})
|
||||
super options
|
||||
@phases[:initial] = InitialPhase.new(self, @tree)
|
||||
@phases[:rootElement] = XhmlRootPhase.new(self, @tree)
|
||||
end
|
||||
|
||||
def normalizeToken(token)
|
||||
super(token)
|
||||
|
||||
# ensure that non-void XHTML elements have content so that separate
|
||||
# open and close tags are emitted
|
||||
if token[:type] == :EndTag
|
||||
if VOID_ELEMENTS.include? token[:name]
|
||||
if @tree.openElements[-1].name != token["name"]:
|
||||
token[:type] = :EmptyTag
|
||||
token["data"] ||= {}
|
||||
end
|
||||
else
|
||||
if token[:name] == @tree.openElements[-1].name and \
|
||||
not @tree.openElements[-1].hasContent
|
||||
@tree.insertText('') unless
|
||||
@tree.openElements.any? {|e|
|
||||
e.attributes.keys.include? 'xmlns' and
|
||||
e.attributes['xmlns'] != 'http://www.w3.org/1999/xhtml'
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return token
|
||||
end
|
||||
end
|
||||
|
||||
class XhmlRootPhase < RootElementPhase
|
||||
def insertHtmlElement
|
||||
element = @tree.createElement("html", {'xmlns' => 'http://www.w3.org/1999/xhtml'})
|
||||
@tree.openElements.push(element)
|
||||
@tree.document.appendChild(element)
|
||||
@parser.phase = @parser.phases[:beforeHead]
|
||||
end
|
||||
end
|
||||
|
||||
class XmlRootPhase < Phase
|
||||
# Prime the Xml parser
|
||||
@start_tag_handlers = Hash.new(:startTagOther)
|
||||
@end_tag_handlers = Hash.new(:endTagOther)
|
||||
def startTagOther(name, attributes)
|
||||
@tree.openElements.push(@tree.document)
|
||||
element = @tree.createElement(name, attributes)
|
||||
@tree.openElements[-1].appendChild(element)
|
||||
@tree.openElements.push(element)
|
||||
@parser.phase = XmlElementPhase.new(@parser,@tree)
|
||||
end
|
||||
def endTagOther(name)
|
||||
super
|
||||
@tree.openElements.pop
|
||||
end
|
||||
end
|
||||
|
||||
class XmlElementPhase < Phase
|
||||
# Generic handling for all XML elements
|
||||
|
||||
@start_tag_handlers = Hash.new(:startTagOther)
|
||||
@end_tag_handlers = Hash.new(:endTagOther)
|
||||
|
||||
def startTagOther(name, attributes)
|
||||
element = @tree.createElement(name, attributes)
|
||||
@tree.openElements[-1].appendChild(element)
|
||||
@tree.openElements.push(element)
|
||||
end
|
||||
|
||||
def endTagOther(name)
|
||||
for node in @tree.openElements.reverse
|
||||
if node.name == name
|
||||
{} while @tree.openElements.pop != node
|
||||
break
|
||||
else
|
||||
@parser.parseError
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def processCharacters(data)
|
||||
@tree.insertText(data)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
188
vendor/plugins/HTML5lib/lib/html5/sanitizer.rb
vendored
Normal file
188
vendor/plugins/HTML5lib/lib/html5/sanitizer.rb
vendored
Normal file
|
@ -0,0 +1,188 @@
|
|||
require 'cgi'
|
||||
require 'html5/tokenizer'
|
||||
|
||||
module HTML5
|
||||
|
||||
# This module provides sanitization of XHTML+MathML+SVG
|
||||
# and of inline style attributes.
|
||||
#
|
||||
# It can be either at the Tokenizer stage:
|
||||
#
|
||||
# HTMLParser.parse(html, :tokenizer => HTMLSanitizer)
|
||||
#
|
||||
# or, if you already have a parse tree (in this example, a REXML tree),
|
||||
# at the Serializer stage:
|
||||
#
|
||||
# tokens = TreeWalkers.getTreeWalker('rexml').new(tree)
|
||||
# HTMLSerializer.serialize(tokens, {:encoding=>'utf-8',
|
||||
# :sanitize => true})
|
||||
|
||||
module HTMLSanitizeModule
|
||||
|
||||
ACCEPTABLE_ELEMENTS = %w[a abbr acronym address area b big blockquote br
|
||||
button caption center cite code col colgroup dd del dfn dir div dl dt
|
||||
em fieldset font form h1 h2 h3 h4 h5 h6 hr i img input ins kbd label
|
||||
legend li map menu ol optgroup option p pre q s samp select small span
|
||||
strike strong sub sup table tbody td textarea tfoot th thead tr tt u
|
||||
ul var]
|
||||
|
||||
MATHML_ELEMENTS = %w[maction math merror mfrac mi mmultiscripts mn mo
|
||||
mover mpadded mphantom mprescripts mroot mrow mspace msqrt mstyle msub
|
||||
msubsup msup mtable mtd mtext mtr munder munderover none]
|
||||
|
||||
SVG_ELEMENTS = %w[a animate animateColor animateMotion animateTransform
|
||||
circle defs desc ellipse font-face font-face-name font-face-src g
|
||||
glyph hkern image linearGradient line marker metadata missing-glyph
|
||||
mpath path polygon polyline radialGradient rect set stop svg switch
|
||||
text title tspan use]
|
||||
|
||||
ACCEPTABLE_ATTRIBUTES = %w[abbr accept accept-charset accesskey action
|
||||
align alt axis border cellpadding cellspacing char charoff charset
|
||||
checked cite class clear cols colspan color compact coords datetime
|
||||
dir disabled enctype for frame headers height href hreflang hspace id
|
||||
ismap label lang longdesc maxlength media method multiple name nohref
|
||||
noshade nowrap prompt readonly rel rev rows rowspan rules scope
|
||||
selected shape size span src start style summary tabindex target title
|
||||
type usemap valign value vspace width xml:lang]
|
||||
|
||||
MATHML_ATTRIBUTES = %w[actiontype align columnalign columnalign
|
||||
columnalign columnlines columnspacing columnspan depth display
|
||||
displaystyle equalcolumns equalrows fence fontstyle fontweight frame
|
||||
height linethickness lspace mathbackground mathcolor mathvariant
|
||||
mathvariant maxsize minsize other rowalign rowalign rowalign rowlines
|
||||
rowspacing rowspan rspace scriptlevel selection separator stretchy
|
||||
width width xlink:href xlink:show xlink:type xmlns xmlns:xlink]
|
||||
|
||||
SVG_ATTRIBUTES = %w[accent-height accumulate additive alphabetic
|
||||
arabic-form ascent attributeName attributeType baseProfile bbox begin
|
||||
by calcMode cap-height class color color-rendering content cx cy d dx
|
||||
dy descent display dur end fill fill-rule font-family font-size
|
||||
font-stretch font-style font-variant font-weight from fx fy g1 g2
|
||||
glyph-name gradientUnits hanging height horiz-adv-x horiz-origin-x id
|
||||
ideographic k keyPoints keySplines keyTimes lang marker-end
|
||||
marker-mid marker-start markerHeight markerUnits markerWidth
|
||||
mathematical max min name offset opacity orient origin
|
||||
overline-position overline-thickness panose-1 path pathLength points
|
||||
preserveAspectRatio r refX refY repeatCount repeatDur
|
||||
requiredExtensions requiredFeatures restart rotate rx ry slope stemh
|
||||
stemv stop-color stop-opacity strikethrough-position
|
||||
strikethrough-thickness stroke stroke-dasharray stroke-dashoffset
|
||||
stroke-linecap stroke-linejoin stroke-miterlimit stroke-opacity
|
||||
stroke-width systemLanguage target text-anchor to transform type u1
|
||||
u2 underline-position underline-thickness unicode unicode-range
|
||||
units-per-em values version viewBox visibility width widths x
|
||||
x-height x1 x2 xlink:actuate xlink:arcrole xlink:href xlink:role
|
||||
xlink:show xlink:title xlink:type xml:base xml:lang xml:space xmlns
|
||||
xmlns:xlink y y1 y2 zoomAndPan]
|
||||
|
||||
ATTR_VAL_IS_URI = %w[href src cite action longdesc xlink:href xml:base]
|
||||
|
||||
ACCEPTABLE_CSS_PROPERTIES = %w[azimuth background-color
|
||||
border-bottom-color border-collapse border-color border-left-color
|
||||
border-right-color border-top-color clear color cursor direction
|
||||
display elevation float font font-family font-size font-style
|
||||
font-variant font-weight height letter-spacing line-height overflow
|
||||
pause pause-after pause-before pitch pitch-range richness speak
|
||||
speak-header speak-numeral speak-punctuation speech-rate stress
|
||||
text-align text-decoration text-indent unicode-bidi vertical-align
|
||||
voice-family volume white-space width]
|
||||
|
||||
ACCEPTABLE_CSS_KEYWORDS = %w[auto aqua black block blue bold both bottom
|
||||
brown center collapse dashed dotted fuchsia gray green !important
|
||||
italic left lime maroon medium none navy normal nowrap olive pointer
|
||||
purple red right solid silver teal top transparent underline white
|
||||
yellow]
|
||||
|
||||
ACCEPTABLE_SVG_PROPERTIES = %w[fill fill-opacity fill-rule stroke
|
||||
stroke-width stroke-linecap stroke-linejoin stroke-opacity]
|
||||
|
||||
ACCEPTABLE_PROTOCOLS = %w[ed2k ftp http https irc mailto news gopher nntp
|
||||
telnet webcal xmpp callto feed urn aim rsync tag ssh sftp rtsp afs]
|
||||
|
||||
# subclasses may define their own versions of these constants
|
||||
ALLOWED_ELEMENTS = ACCEPTABLE_ELEMENTS + MATHML_ELEMENTS + SVG_ELEMENTS
|
||||
ALLOWED_ATTRIBUTES = ACCEPTABLE_ATTRIBUTES + MATHML_ATTRIBUTES + SVG_ATTRIBUTES
|
||||
ALLOWED_CSS_PROPERTIES = ACCEPTABLE_CSS_PROPERTIES
|
||||
ALLOWED_CSS_KEYWORDS = ACCEPTABLE_CSS_KEYWORDS
|
||||
ALLOWED_SVG_PROPERTIES = ACCEPTABLE_SVG_PROPERTIES
|
||||
ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS
|
||||
|
||||
def sanitize_token(token)
|
||||
case token[:type]
|
||||
when :StartTag, :EndTag, :EmptyTag
|
||||
if ALLOWED_ELEMENTS.include?(token[:name])
|
||||
if token.has_key? :data
|
||||
attrs = Hash[*token[:data].flatten]
|
||||
attrs.delete_if { |attr,v| !ALLOWED_ATTRIBUTES.include?(attr) }
|
||||
ATTR_VAL_IS_URI.each do |attr|
|
||||
val_unescaped = CGI.unescapeHTML(attrs[attr].to_s).gsub(/`|[\000-\040\177\s]+|\302[\200-\240]/,'').downcase
|
||||
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ and !ALLOWED_PROTOCOLS.include?(val_unescaped.split(':')[0])
|
||||
attrs.delete attr
|
||||
end
|
||||
end
|
||||
if attrs['style']
|
||||
attrs['style'] = sanitize_css(attrs['style'])
|
||||
end
|
||||
token[:data] = attrs.map {|k,v| [k,v]}
|
||||
end
|
||||
return token
|
||||
else
|
||||
if token[:type] == :EndTag
|
||||
token[:data] = "</#{token[:name]}>"
|
||||
elsif token[:data]
|
||||
attrs = token[:data].map {|k,v| " #{k}=\"#{CGI.escapeHTML(v)}\""}.join('')
|
||||
token[:data] = "<#{token[:name]}#{attrs}>"
|
||||
else
|
||||
token[:data] = "<#{token[:name]}>"
|
||||
end
|
||||
token[:data].insert(-2,'/') if token[:type] == :EmptyTag
|
||||
token[:type] = :Characters
|
||||
token.delete(:name)
|
||||
return token
|
||||
end
|
||||
when :Comment
|
||||
token[:data] = ""
|
||||
return token
|
||||
else
|
||||
return token
|
||||
end
|
||||
end
|
||||
|
||||
def sanitize_css(style)
|
||||
# disallow urls
|
||||
style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')
|
||||
|
||||
# gauntlet
|
||||
return '' unless style =~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/
|
||||
return '' unless style =~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$/
|
||||
|
||||
clean = []
|
||||
style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop, val|
|
||||
next if val.empty?
|
||||
prop.downcase!
|
||||
if ALLOWED_CSS_PROPERTIES.include?(prop)
|
||||
clean << "#{prop}: #{val};"
|
||||
elsif %w[background border margin padding].include?(prop.split('-')[0])
|
||||
clean << "#{prop}: #{val};" unless val.split().any? do |keyword|
|
||||
!ALLOWED_CSS_KEYWORDS.include?(keyword) and
|
||||
keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
|
||||
end
|
||||
elsif ALLOWED_SVG_PROPERTIES.include?(prop)
|
||||
clean << "#{prop}: #{val};"
|
||||
end
|
||||
end
|
||||
|
||||
style = clean.join(' ')
|
||||
end
|
||||
end
|
||||
|
||||
class HTMLSanitizer < HTMLTokenizer
|
||||
include HTMLSanitizeModule
|
||||
def each
|
||||
super do |token|
|
||||
yield(sanitize_token(token))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
2
vendor/plugins/HTML5lib/lib/html5/serializer.rb
vendored
Normal file
2
vendor/plugins/HTML5lib/lib/html5/serializer.rb
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
require 'html5/serializer/htmlserializer'
|
||||
require 'html5/serializer/xhtmlserializer'
|
178
vendor/plugins/HTML5lib/lib/html5/serializer/htmlserializer.rb
vendored
Normal file
178
vendor/plugins/HTML5lib/lib/html5/serializer/htmlserializer.rb
vendored
Normal file
|
@ -0,0 +1,178 @@
|
|||
require 'html5/constants'
|
||||
|
||||
module HTML5
|
||||
|
||||
class HTMLSerializer
|
||||
|
||||
def self.serialize(stream, options = {})
|
||||
new(options).serialize(stream, options[:encoding])
|
||||
end
|
||||
|
||||
def escape(string)
|
||||
string.gsub("&", "&").gsub("<", "<").gsub(">", ">")
|
||||
end
|
||||
|
||||
def initialize(options={})
|
||||
@quote_attr_values = false
|
||||
@quote_char = '"'
|
||||
@use_best_quote_char = true
|
||||
@minimize_boolean_attributes = true
|
||||
|
||||
@use_trailing_solidus = false
|
||||
@space_before_trailing_solidus = true
|
||||
@escape_lt_in_attrs = false
|
||||
@escape_rcdata = false
|
||||
|
||||
@omit_optional_tags = true
|
||||
@sanitize = false
|
||||
|
||||
@strip_whitespace = false
|
||||
|
||||
@inject_meta_charset = true
|
||||
|
||||
options.each do |name, value|
|
||||
next unless instance_variables.include?("@#{name}")
|
||||
@use_best_quote_char = false if name.to_s == 'quote_char'
|
||||
instance_variable_set("@#{name}", value)
|
||||
end
|
||||
|
||||
@errors = []
|
||||
end
|
||||
|
||||
def serialize(treewalker, encoding=nil)
|
||||
in_cdata = false
|
||||
@errors = []
|
||||
|
||||
if encoding and @inject_meta_charset
|
||||
require 'html5/filters/inject_meta_charset'
|
||||
treewalker = Filters::InjectMetaCharset.new(treewalker, encoding)
|
||||
end
|
||||
|
||||
if @strip_whitespace
|
||||
require 'html5/filters/whitespace'
|
||||
treewalker = Filters::WhitespaceFilter.new(treewalker)
|
||||
end
|
||||
|
||||
if @sanitize
|
||||
require 'html5/filters/sanitizer'
|
||||
treewalker = Filters::HTMLSanitizeFilter.new(treewalker)
|
||||
end
|
||||
|
||||
if @omit_optional_tags
|
||||
require 'html5/filters/optionaltags'
|
||||
treewalker = Filters::OptionalTagFilter.new(treewalker)
|
||||
end
|
||||
|
||||
result = []
|
||||
treewalker.each do |token|
|
||||
type = token[:type]
|
||||
if type == :Doctype
|
||||
doctype = "<!DOCTYPE %s>" % token[:name]
|
||||
result << doctype
|
||||
|
||||
elsif [:Characters, :SpaceCharacters].include? type
|
||||
if type == :SpaceCharacters or in_cdata
|
||||
if in_cdata and token[:data].include?("</")
|
||||
serializeError(_("Unexpected </ in CDATA"))
|
||||
end
|
||||
result << token[:data]
|
||||
else
|
||||
result << escape(token[:data])
|
||||
end
|
||||
|
||||
elsif [:StartTag, :EmptyTag].include? type
|
||||
name = token[:name]
|
||||
if RCDATA_ELEMENTS.include?(name) and not @escape_rcdata
|
||||
in_cdata = true
|
||||
elsif in_cdata
|
||||
serializeError(_("Unexpected child element of a CDATA element"))
|
||||
end
|
||||
attributes = []
|
||||
for k,v in attrs = token[:data].to_a.sort
|
||||
attributes << ' '
|
||||
|
||||
attributes << k
|
||||
if not @minimize_boolean_attributes or \
|
||||
(!(BOOLEAN_ATTRIBUTES[name]||[]).include?(k) \
|
||||
and !BOOLEAN_ATTRIBUTES[:global].include?(k))
|
||||
attributes << "="
|
||||
if @quote_attr_values or v.empty?
|
||||
quote_attr = true
|
||||
else
|
||||
quote_attr = (SPACE_CHARACTERS + %w(< > " ')).any? {|c| v.include?(c)}
|
||||
end
|
||||
v = v.gsub("&", "&")
|
||||
v = v.gsub("<", "<") if @escape_lt_in_attrs
|
||||
if quote_attr
|
||||
quote_char = @quote_char
|
||||
if @use_best_quote_char
|
||||
if v.index("'") and !v.index('"')
|
||||
quote_char = '"'
|
||||
elsif v.index('"') and !v.index("'")
|
||||
quote_char = "'"
|
||||
end
|
||||
end
|
||||
if quote_char == "'"
|
||||
v = v.gsub("'", "'")
|
||||
else
|
||||
v = v.gsub('"', """)
|
||||
end
|
||||
attributes << quote_char << v << quote_char
|
||||
else
|
||||
attributes << v
|
||||
end
|
||||
end
|
||||
end
|
||||
if VOID_ELEMENTS.include?(name) and @use_trailing_solidus
|
||||
if @space_before_trailing_solidus
|
||||
attributes << " /"
|
||||
else
|
||||
attributes << "/"
|
||||
end
|
||||
end
|
||||
result << "<%s%s>" % [name, attributes.join('')]
|
||||
|
||||
elsif type == :EndTag
|
||||
name = token[:name]
|
||||
if RCDATA_ELEMENTS.include?(name)
|
||||
in_cdata = false
|
||||
elsif in_cdata
|
||||
serializeError(_("Unexpected child element of a CDATA element"))
|
||||
end
|
||||
end_tag = "</#{name}>"
|
||||
result << end_tag
|
||||
|
||||
elsif type == :Comment
|
||||
data = token[:data]
|
||||
serializeError(_("Comment contains --")) if data.index("--")
|
||||
comment = "<!--%s-->" % token[:data]
|
||||
result << comment
|
||||
|
||||
else
|
||||
serializeError(token[:data])
|
||||
end
|
||||
end
|
||||
|
||||
if encoding and encoding != 'utf-8'
|
||||
require 'iconv'
|
||||
Iconv.iconv(encoding, 'utf-8', result.join('')).first
|
||||
else
|
||||
result.join('')
|
||||
end
|
||||
end
|
||||
|
||||
alias :render :serialize
|
||||
|
||||
def serializeError(data="XXX ERROR MESSAGE NEEDED")
|
||||
# XXX The idea is to make data mandatory.
|
||||
@errors.push(data)
|
||||
if @strict
|
||||
raise SerializeError
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Error in serialized tree
|
||||
class SerializeError < Exception
|
||||
end
|
||||
end
|
20
vendor/plugins/HTML5lib/lib/html5/serializer/xhtmlserializer.rb
vendored
Normal file
20
vendor/plugins/HTML5lib/lib/html5/serializer/xhtmlserializer.rb
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
require 'html5/serializer/htmlserializer'
|
||||
|
||||
module HTML5
|
||||
|
||||
class XHTMLSerializer < HTMLSerializer
|
||||
DEFAULTS = {
|
||||
:quote_attr_values => true,
|
||||
:minimize_boolean_attributes => false,
|
||||
:use_trailing_solidus => true,
|
||||
:escape_lt_in_attrs => true,
|
||||
:omit_optional_tags => false,
|
||||
:escape_rcdata => true
|
||||
}
|
||||
|
||||
def initialize(options={})
|
||||
super(DEFAULTS.clone.update(options))
|
||||
end
|
||||
end
|
||||
|
||||
end
|
1065
vendor/plugins/HTML5lib/lib/html5/tokenizer.rb
vendored
Normal file
1065
vendor/plugins/HTML5lib/lib/html5/tokenizer.rb
vendored
Normal file
File diff suppressed because it is too large
Load diff
24
vendor/plugins/HTML5lib/lib/html5/treebuilders.rb
vendored
Normal file
24
vendor/plugins/HTML5lib/lib/html5/treebuilders.rb
vendored
Normal file
|
@ -0,0 +1,24 @@
|
|||
module HTML5
|
||||
module TreeBuilders
|
||||
|
||||
class << self
|
||||
def [](name)
|
||||
case name.to_s.downcase
|
||||
when 'simpletree' then
|
||||
require 'html5/treebuilders/simpletree'
|
||||
SimpleTree::TreeBuilder
|
||||
when 'rexml' then
|
||||
require 'html5/treebuilders/rexml'
|
||||
REXML::TreeBuilder
|
||||
when 'hpricot' then
|
||||
require 'html5/treebuilders/hpricot'
|
||||
Hpricot::TreeBuilder
|
||||
else
|
||||
raise "Unknown TreeBuilder #{name}"
|
||||
end
|
||||
end
|
||||
|
||||
alias :getTreeBuilder :[]
|
||||
end
|
||||
end
|
||||
end
|
330
vendor/plugins/HTML5lib/lib/html5/treebuilders/base.rb
vendored
Executable file
330
vendor/plugins/HTML5lib/lib/html5/treebuilders/base.rb
vendored
Executable file
|
@ -0,0 +1,330 @@
|
|||
require 'html5/constants'
|
||||
|
||||
#XXX - TODO; make the default interface more ElementTree-like rather than DOM-like
|
||||
|
||||
module HTML5
|
||||
|
||||
# The scope markers are inserted when entering buttons, object elements,
|
||||
# marquees, table cells, and table captions, and are used to prevent formatting
|
||||
# from "leaking" into tables, buttons, object elements, and marquees.
|
||||
Marker = nil
|
||||
|
||||
module TreeBuilders
|
||||
module Base
|
||||
|
||||
class Node
|
||||
# The parent of the current node (or nil for the document node)
|
||||
attr_accessor :parent
|
||||
|
||||
# a list of child nodes of the current node. This must
|
||||
# include all elements but not necessarily other node types
|
||||
attr_accessor :childNodes
|
||||
|
||||
# A list of miscellaneous flags that can be set on the node
|
||||
attr_accessor :_flags
|
||||
|
||||
def initialize(name)
|
||||
@parent = nil
|
||||
@childNodes = []
|
||||
@_flags = []
|
||||
end
|
||||
|
||||
# Insert node as a child of the current node
|
||||
def appendChild(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Insert data as text in the current node, positioned before the
|
||||
# start of node insertBefore or to the end of the node's text.
|
||||
def insertText(data, insertBefore=nil)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Insert node as a child of the current node, before refNode in the
|
||||
# list of child nodes. Raises ValueError if refNode is not a child of
|
||||
# the current node
|
||||
def insertBefore(node, refNode)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Remove node from the children of the current node
|
||||
def removeChild(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Move all the children of the current node to newParent.
|
||||
# This is needed so that trees that don't store text as nodes move the
|
||||
# text in the correct way
|
||||
def reparentChildren(newParent)
|
||||
#XXX - should this method be made more general?
|
||||
@childNodes.each { |child| newParent.appendChild(child) }
|
||||
@childNodes = []
|
||||
end
|
||||
|
||||
# Return a shallow copy of the current node i.e. a node with the same
|
||||
# name and attributes but with no parent or child nodes
|
||||
def cloneNode
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
# Return true if the node has children or text, false otherwise
|
||||
def hasContent
|
||||
raise NotImplementedError
|
||||
end
|
||||
end
|
||||
|
||||
# Base treebuilder implementation
|
||||
class TreeBuilder
|
||||
|
||||
attr_accessor :openElements
|
||||
|
||||
attr_accessor :activeFormattingElements
|
||||
|
||||
attr_accessor :document
|
||||
|
||||
attr_accessor :headPointer
|
||||
|
||||
attr_accessor :formPointer
|
||||
|
||||
# Class to use for document root
|
||||
documentClass = nil
|
||||
|
||||
# Class to use for HTML elements
|
||||
elementClass = nil
|
||||
|
||||
# Class to use for comments
|
||||
commentClass = nil
|
||||
|
||||
# Class to use for doctypes
|
||||
doctypeClass = nil
|
||||
|
||||
# Fragment class
|
||||
fragmentClass = nil
|
||||
|
||||
def initialize
|
||||
reset
|
||||
end
|
||||
|
||||
def reset
|
||||
@openElements = []
|
||||
@activeFormattingElements = []
|
||||
|
||||
#XXX - rename these to headElement, formElement
|
||||
@headPointer = nil
|
||||
@formPointer = nil
|
||||
|
||||
self.insertFromTable = false
|
||||
|
||||
@document = @documentClass.new
|
||||
end
|
||||
|
||||
def elementInScope(target, tableVariant=false)
|
||||
# Exit early when possible.
|
||||
return true if @openElements[-1].name == target
|
||||
|
||||
# AT How about while true and simply set node to [-1] and set it to
|
||||
# [-2] at the end...
|
||||
@openElements.reverse.each do |element|
|
||||
if element.name == target
|
||||
return true
|
||||
elsif element.name == 'table'
|
||||
return false
|
||||
elsif not tableVariant and SCOPING_ELEMENTS.include?(element.name)
|
||||
return false
|
||||
elsif element.name == 'html'
|
||||
return false
|
||||
end
|
||||
end
|
||||
assert false # We should never reach this point
|
||||
end
|
||||
|
||||
def reconstructActiveFormattingElements
|
||||
# Within this algorithm the order of steps described in the
|
||||
# specification is not quite the same as the order of steps in the
|
||||
# code. It should still do the same though.
|
||||
|
||||
# Step 1: stop the algorithm when there's nothing to do.
|
||||
return if @activeFormattingElements.empty?
|
||||
|
||||
# Step 2 and step 3: we start with the last element. So i is -1.
|
||||
i = -1
|
||||
entry = @activeFormattingElements[i]
|
||||
return if entry == Marker or @openElements.include?(entry)
|
||||
|
||||
# Step 6
|
||||
until entry == Marker or @openElements.include?(entry)
|
||||
# Step 5: let entry be one earlier in the list.
|
||||
i -= 1
|
||||
begin
|
||||
entry = @activeFormattingElements[i]
|
||||
rescue
|
||||
# Step 4: at this point we need to jump to step 8. By not doing
|
||||
# i += 1 which is also done in step 7 we achieve that.
|
||||
break
|
||||
end
|
||||
end
|
||||
while true
|
||||
# Step 7
|
||||
i += 1
|
||||
|
||||
# Step 8
|
||||
clone = @activeFormattingElements[i].cloneNode
|
||||
|
||||
# Step 9
|
||||
element = insertElement(clone.name, clone.attributes)
|
||||
|
||||
# Step 10
|
||||
@activeFormattingElements[i] = element
|
||||
|
||||
# Step 11
|
||||
break if element == @activeFormattingElements[-1]
|
||||
end
|
||||
end
|
||||
|
||||
def clearActiveFormattingElements
|
||||
{} until @activeFormattingElements.empty? || @activeFormattingElements.pop == Marker
|
||||
end
|
||||
|
||||
# Check if an element exists between the end of the active
|
||||
# formatting elements and the last marker. If it does, return it, else
|
||||
# return false
|
||||
def elementInActiveFormattingElements(name)
|
||||
@activeFormattingElements.reverse.each do |element|
|
||||
# Check for Marker first because if it's a Marker it doesn't have a
|
||||
# name attribute.
|
||||
break if element == Marker
|
||||
return element if element.name == name
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
def insertDoctype(name)
|
||||
@document.appendChild(@doctypeClass.new(name))
|
||||
end
|
||||
|
||||
def insertComment(data, parent=nil)
|
||||
parent = @openElements[-1] if parent.nil?
|
||||
parent.appendChild(@commentClass.new(data))
|
||||
end
|
||||
|
||||
# Create an element but don't insert it anywhere
|
||||
def createElement(name, attributes)
|
||||
element = @elementClass.new(name)
|
||||
element.attributes = attributes
|
||||
return element
|
||||
end
|
||||
|
||||
# Switch the function used to insert an element from the
|
||||
# normal one to the misnested table one and back again
|
||||
def insertFromTable=(value)
|
||||
@insertFromTable = value
|
||||
@insertElement = value ? :insertElementTable : :insertElementNormal
|
||||
end
|
||||
|
||||
def insertElement(name, attributes)
|
||||
send(@insertElement, name, attributes)
|
||||
end
|
||||
|
||||
def insertElementNormal(name, attributes)
|
||||
element = @elementClass.new(name)
|
||||
element.attributes = attributes
|
||||
@openElements[-1].appendChild(element)
|
||||
@openElements.push(element)
|
||||
return element
|
||||
end
|
||||
|
||||
# Create an element and insert it into the tree
|
||||
def insertElementTable(name, attributes)
|
||||
element = @elementClass.new(name)
|
||||
element.attributes = attributes
|
||||
if TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name)
|
||||
#We should be in the InTable mode. This means we want to do
|
||||
#special magic element rearranging
|
||||
parent, insertBefore = getTableMisnestedNodePosition
|
||||
if insertBefore.nil?
|
||||
parent.appendChild(element)
|
||||
else
|
||||
parent.insertBefore(element, insertBefore)
|
||||
end
|
||||
@openElements.push(element)
|
||||
else
|
||||
return insertElementNormal(name, attributes)
|
||||
end
|
||||
return element
|
||||
end
|
||||
|
||||
def insertText(data, parent=nil)
|
||||
parent = @openElements[-1] if parent.nil?
|
||||
|
||||
if (not(@insertFromTable) or (@insertFromTable and not TABLE_INSERT_MODE_ELEMENTS.include?(@openElements[-1].name)))
|
||||
parent.insertText(data)
|
||||
else
|
||||
#We should be in the InTable mode. This means we want to do
|
||||
#special magic element rearranging
|
||||
parent, insertBefore = getTableMisnestedNodePosition
|
||||
parent.insertText(data, insertBefore)
|
||||
end
|
||||
end
|
||||
|
||||
# Get the foster parent element, and sibling to insert before
|
||||
# (or nil) when inserting a misnested table node
|
||||
def getTableMisnestedNodePosition
|
||||
#The foster parent element is the one which comes before the most
|
||||
#recently opened table element
|
||||
#XXX - this is really inelegant
|
||||
lastTable = nil
|
||||
fosterParent = nil
|
||||
insertBefore = nil
|
||||
@openElements.reverse.each do |element|
|
||||
if element.name == "table"
|
||||
lastTable = element
|
||||
break
|
||||
end
|
||||
end
|
||||
if lastTable
|
||||
#XXX - we should really check that this parent is actually a
|
||||
#node here
|
||||
if lastTable.parent
|
||||
fosterParent = lastTable.parent
|
||||
insertBefore = lastTable
|
||||
else
|
||||
fosterParent = @openElements[@openElements.index(lastTable) - 1]
|
||||
end
|
||||
else
|
||||
fosterParent = @openElements[0]
|
||||
end
|
||||
return fosterParent, insertBefore
|
||||
end
|
||||
|
||||
def generateImpliedEndTags(exclude=nil)
|
||||
name = @openElements[-1].name
|
||||
|
||||
if (['dd', 'dt', 'li', 'p', 'td', 'th', 'tr'].include?(name) and name != exclude)
|
||||
@openElements.pop
|
||||
# XXX This is not entirely what the specification says. We should
|
||||
# investigate it more closely.
|
||||
generateImpliedEndTags(exclude)
|
||||
end
|
||||
end
|
||||
|
||||
def getDocument
|
||||
@document
|
||||
end
|
||||
|
||||
def getFragment
|
||||
#assert @innerHTML
|
||||
fragment = @fragmentClass.new
|
||||
@openElements[0].reparentChildren(fragment)
|
||||
return fragment
|
||||
end
|
||||
|
||||
# Serialize the subtree of node in the format required by unit tests
|
||||
# node - the node from which to start serializing
|
||||
def testSerializer(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
221
vendor/plugins/HTML5lib/lib/html5/treebuilders/hpricot.rb
vendored
Normal file
221
vendor/plugins/HTML5lib/lib/html5/treebuilders/hpricot.rb
vendored
Normal file
|
@ -0,0 +1,221 @@
|
|||
require 'html5/treebuilders/base'
|
||||
require 'rubygems'
|
||||
require 'hpricot'
|
||||
require 'forwardable'
|
||||
|
||||
module HTML5
|
||||
module TreeBuilders
|
||||
module Hpricot
|
||||
|
||||
class Node < Base::Node
|
||||
|
||||
extend Forwardable
|
||||
|
||||
def_delegators :@hpricot, :name
|
||||
|
||||
attr_accessor :hpricot
|
||||
|
||||
def initialize(name)
|
||||
super(name)
|
||||
@hpricot = self.class.hpricot_class.new name
|
||||
end
|
||||
|
||||
def appendChild(node)
|
||||
if node.kind_of?(TextNode) and childNodes.any? and childNodes.last.kind_of?(TextNode)
|
||||
childNodes[-1].hpricot.content = childNodes[-1].hpricot.to_s + node.hpricot.to_s
|
||||
else
|
||||
childNodes << node
|
||||
hpricot.children << node.hpricot
|
||||
end
|
||||
if (oldparent = node.hpricot.parent) != nil
|
||||
oldparent.children.delete_at(oldparent.children.index(node.hpricot))
|
||||
end
|
||||
node.hpricot.parent = hpricot
|
||||
node.parent = self
|
||||
end
|
||||
|
||||
def removeChild(node)
|
||||
childNodes.delete(node)
|
||||
hpricot.children.delete_at(hpricot.children.index(node.hpricot))
|
||||
node.hpricot.parent = nil
|
||||
node.parent = nil
|
||||
end
|
||||
|
||||
def insertText(data, before=nil)
|
||||
if before
|
||||
insertBefore(TextNode.new(data), before)
|
||||
else
|
||||
appendChild(TextNode.new(data))
|
||||
end
|
||||
end
|
||||
|
||||
def insertBefore(node, refNode)
|
||||
index = childNodes.index(refNode)
|
||||
if node.kind_of?(TextNode) and index > 0 and childNodes[index-1].kind_of?(TextNode)
|
||||
childNodes[index-1].hpricot.content = childNodes[index-1].hpricot.to_s + node.hpricot.to_s
|
||||
else
|
||||
refNode.hpricot.parent.insert_before(node.hpricot,refNode.hpricot)
|
||||
childNodes.insert(index, node)
|
||||
end
|
||||
end
|
||||
|
||||
def hasContent
|
||||
childNodes.any?
|
||||
end
|
||||
end
|
||||
|
||||
class Element < Node
|
||||
def self.hpricot_class
|
||||
::Hpricot::Elem
|
||||
end
|
||||
|
||||
def initialize(name)
|
||||
super(name)
|
||||
|
||||
@hpricot = ::Hpricot::Elem.new(::Hpricot::STag.new(name))
|
||||
end
|
||||
|
||||
def name
|
||||
@hpricot.stag.name
|
||||
end
|
||||
|
||||
def cloneNode
|
||||
attributes.inject(self.class.new(name)) do |node, (name, value)|
|
||||
node.hpricot[name] = value
|
||||
node
|
||||
end
|
||||
end
|
||||
|
||||
# A call to Hpricot::Elem#raw_attributes is built dynamically,
|
||||
# so alterations to the returned value (a hash) will be lost.
|
||||
#
|
||||
# AttributeProxy works around this by forwarding :[]= calls
|
||||
# to the raw_attributes accessor on the element start tag.
|
||||
#
|
||||
class AttributeProxy
|
||||
def initialize(hpricot)
|
||||
@hpricot = hpricot
|
||||
end
|
||||
|
||||
def []=(k, v)
|
||||
@hpricot.stag.send(stag_attributes_method)[k] = v
|
||||
end
|
||||
|
||||
def stag_attributes_method
|
||||
# STag#attributes changed to STag#raw_attributes after Hpricot 0.5
|
||||
@hpricot.stag.respond_to?(:raw_attributes) ? :raw_attributes : :attributes
|
||||
end
|
||||
|
||||
def method_missing(*a, &b)
|
||||
@hpricot.attributes.send(*a, &b)
|
||||
end
|
||||
end
|
||||
|
||||
def attributes
|
||||
AttributeProxy.new(@hpricot)
|
||||
end
|
||||
|
||||
def attributes=(attrs)
|
||||
attrs.each { |name, value| @hpricot[name] = value }
|
||||
end
|
||||
|
||||
def printTree(indent=0)
|
||||
tree = "\n|#{' ' * indent}<#{name}>"
|
||||
indent += 2
|
||||
attributes.each do |name, value|
|
||||
next if name == 'xmlns'
|
||||
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
|
||||
end
|
||||
childNodes.inject(tree) { |tree, child| tree + child.printTree(indent) }
|
||||
end
|
||||
end
|
||||
|
||||
class Document < Node
|
||||
def self.hpricot_class
|
||||
::Hpricot::Doc
|
||||
end
|
||||
|
||||
def initialize
|
||||
super(nil)
|
||||
end
|
||||
|
||||
def printTree(indent=0)
|
||||
childNodes.inject('#document') { |tree, child| tree + child.printTree(indent + 2) }
|
||||
end
|
||||
end
|
||||
|
||||
class DocumentType < Node
|
||||
def self.hpricot_class
|
||||
::Hpricot::DocType
|
||||
end
|
||||
|
||||
def initialize(name)
|
||||
begin
|
||||
super(name)
|
||||
rescue ArgumentError # needs 3...
|
||||
end
|
||||
|
||||
@hpricot = ::Hpricot::DocType.new(name, nil, nil)
|
||||
end
|
||||
|
||||
def printTree(indent=0)
|
||||
"\n|#{' ' * indent}<!DOCTYPE #{hpricot.target}>"
|
||||
end
|
||||
end
|
||||
|
||||
class DocumentFragment < Element
|
||||
def initialize
|
||||
super('')
|
||||
end
|
||||
|
||||
def printTree(indent=0)
|
||||
childNodes.inject('') { |tree, child| tree + child.printTree(indent+2) }
|
||||
end
|
||||
end
|
||||
|
||||
class TextNode < Node
|
||||
def initialize(data)
|
||||
@hpricot = ::Hpricot::Text.new(data)
|
||||
end
|
||||
|
||||
def printTree(indent=0)
|
||||
"\n|#{' ' * indent}\"#{hpricot.content}\""
|
||||
end
|
||||
end
|
||||
|
||||
class CommentNode < Node
|
||||
def self.hpricot_class
|
||||
::Hpricot::Comment
|
||||
end
|
||||
|
||||
def printTree(indent=0)
|
||||
"\n|#{' ' * indent}<!-- #{hpricot.content} -->"
|
||||
end
|
||||
end
|
||||
|
||||
class TreeBuilder < Base::TreeBuilder
|
||||
def initialize
|
||||
@documentClass = Document
|
||||
@doctypeClass = DocumentType
|
||||
@elementClass = Element
|
||||
@commentClass = CommentNode
|
||||
@fragmentClass = DocumentFragment
|
||||
end
|
||||
|
||||
def testSerializer(node)
|
||||
node.printTree
|
||||
end
|
||||
|
||||
def getDocument
|
||||
@document.hpricot
|
||||
end
|
||||
|
||||
def getFragment
|
||||
@document = super
|
||||
return @document.hpricot.children
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
192
vendor/plugins/HTML5lib/lib/html5/treebuilders/rexml.rb
vendored
Normal file
192
vendor/plugins/HTML5lib/lib/html5/treebuilders/rexml.rb
vendored
Normal file
|
@ -0,0 +1,192 @@
|
|||
require 'html5/treebuilders/base'
|
||||
require 'rexml/document'
|
||||
require 'forwardable'
|
||||
|
||||
module HTML5
|
||||
module TreeBuilders
|
||||
module REXML
|
||||
|
||||
class Node < Base::Node
|
||||
extend Forwardable
|
||||
def_delegators :@rxobj, :name, :attributes
|
||||
attr_accessor :rxobj
|
||||
|
||||
def initialize name
|
||||
super name
|
||||
@rxobj = self.class.rxclass.new name
|
||||
end
|
||||
|
||||
def appendChild node
|
||||
if node.kind_of? TextNode and
|
||||
childNodes.length>0 and childNodes[-1].kind_of? TextNode
|
||||
childNodes[-1].rxobj.value =
|
||||
childNodes[-1].rxobj.to_s + node.rxobj.to_s
|
||||
childNodes[-1].rxobj.raw = true
|
||||
else
|
||||
childNodes.push node
|
||||
rxobj.add node.rxobj
|
||||
end
|
||||
node.parent = self
|
||||
end
|
||||
|
||||
def removeChild node
|
||||
childNodes.delete node
|
||||
rxobj.delete node.rxobj
|
||||
node.parent = nil
|
||||
end
|
||||
|
||||
def insertText data, before=nil
|
||||
if before
|
||||
insertBefore TextNode.new(data), before
|
||||
else
|
||||
appendChild TextNode.new(data)
|
||||
end
|
||||
end
|
||||
|
||||
def insertBefore node, refNode
|
||||
index = childNodes.index(refNode)
|
||||
if node.kind_of? TextNode and index>0 and
|
||||
childNodes[index-1].kind_of? TextNode
|
||||
childNodes[index-1].rxobj.value =
|
||||
childNodes[index-1].rxobj.to_s + node.rxobj.to_s
|
||||
childNodes[index-1].rxobj.raw = true
|
||||
else
|
||||
childNodes.insert index, node
|
||||
refNode.rxobj.parent.insert_before(refNode.rxobj,node.rxobj)
|
||||
end
|
||||
end
|
||||
|
||||
def hasContent
|
||||
return (childNodes.length > 0)
|
||||
end
|
||||
end
|
||||
|
||||
class Element < Node
|
||||
def self.rxclass
|
||||
::REXML::Element
|
||||
end
|
||||
|
||||
def initialize name
|
||||
super name
|
||||
end
|
||||
|
||||
def cloneNode
|
||||
newNode = self.class.new name
|
||||
attributes.each {|name,value| newNode.attributes[name] = value}
|
||||
newNode
|
||||
end
|
||||
|
||||
def attributes= value
|
||||
value.each {|name, value| rxobj.attributes[name]=value}
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
tree = "\n|#{' ' * indent}<#{name}>"
|
||||
indent += 2
|
||||
for name, value in attributes
|
||||
next if name == 'xmlns'
|
||||
tree += "\n|#{' ' * indent}#{name}=\"#{value}\""
|
||||
end
|
||||
for child in childNodes
|
||||
tree += child.printTree(indent)
|
||||
end
|
||||
return tree
|
||||
end
|
||||
end
|
||||
|
||||
class Document < Node
|
||||
def self.rxclass
|
||||
::REXML::Document
|
||||
end
|
||||
|
||||
def initialize
|
||||
super nil
|
||||
end
|
||||
|
||||
def appendChild node
|
||||
if node.kind_of? Element and node.name == 'html'
|
||||
node.rxobj.add_namespace('http://www.w3.org/1999/xhtml')
|
||||
end
|
||||
super node
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
tree = "#document"
|
||||
for child in childNodes
|
||||
tree += child.printTree(indent + 2)
|
||||
end
|
||||
return tree
|
||||
end
|
||||
end
|
||||
|
||||
class DocumentType < Node
|
||||
def self.rxclass
|
||||
::REXML::DocType
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
"\n|#{' ' * indent}<!DOCTYPE #{name}>"
|
||||
end
|
||||
end
|
||||
|
||||
class DocumentFragment < Element
|
||||
def initialize
|
||||
super nil
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
tree = ""
|
||||
for child in childNodes
|
||||
tree += child.printTree(indent+2)
|
||||
end
|
||||
return tree
|
||||
end
|
||||
end
|
||||
|
||||
class TextNode < Node
|
||||
def initialize data
|
||||
raw=data.gsub('&','&').gsub('<','<').gsub('>','>')
|
||||
@rxobj = ::REXML::Text.new(raw, true, nil, true)
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
"\n|#{' ' * indent}\"#{rxobj.value}\""
|
||||
end
|
||||
end
|
||||
|
||||
class CommentNode < Node
|
||||
def self.rxclass
|
||||
::REXML::Comment
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
"\n|#{' ' * indent}<!-- #{rxobj.string} -->"
|
||||
end
|
||||
end
|
||||
|
||||
class TreeBuilder < Base::TreeBuilder
|
||||
def initialize
|
||||
@documentClass = Document
|
||||
@doctypeClass = DocumentType
|
||||
@elementClass = Element
|
||||
@commentClass = CommentNode
|
||||
@fragmentClass = DocumentFragment
|
||||
end
|
||||
|
||||
def testSerializer node
|
||||
node.printTree()
|
||||
end
|
||||
|
||||
def getDocument
|
||||
@document.rxobj
|
||||
end
|
||||
|
||||
def getFragment
|
||||
@document = super
|
||||
return @document.rxobj.children
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
178
vendor/plugins/HTML5lib/lib/html5/treebuilders/simpletree.rb
vendored
Normal file
178
vendor/plugins/HTML5lib/lib/html5/treebuilders/simpletree.rb
vendored
Normal file
|
@ -0,0 +1,178 @@
|
|||
require 'html5/treebuilders/base'
|
||||
|
||||
module HTML5
|
||||
module TreeBuilders
|
||||
module SimpleTree
|
||||
|
||||
class Node < Base::Node
|
||||
# Node representing an item in the tree.
|
||||
# name - The tag name associated with the node
|
||||
attr_accessor :name
|
||||
|
||||
# The value of the current node (applies to text nodes and
|
||||
# comments
|
||||
attr_accessor :value
|
||||
|
||||
# a dict holding name, value pairs for attributes of the node
|
||||
attr_accessor :attributes
|
||||
|
||||
def initialize name
|
||||
super
|
||||
@name = name
|
||||
@value = nil
|
||||
@attributes = {}
|
||||
end
|
||||
|
||||
def appendChild node
|
||||
if node.kind_of? TextNode and
|
||||
childNodes.length>0 and childNodes[-1].kind_of? TextNode
|
||||
childNodes[-1].value += node.value
|
||||
else
|
||||
childNodes.push node
|
||||
end
|
||||
node.parent = self
|
||||
end
|
||||
|
||||
def removeChild node
|
||||
childNodes.delete node
|
||||
node.parent = nil
|
||||
end
|
||||
|
||||
def cloneNode
|
||||
newNode = self.class.new name
|
||||
attributes.each {|name,value| newNode.attributes[name] = value}
|
||||
newNode.value = value
|
||||
newNode
|
||||
end
|
||||
|
||||
def insertText data, before=nil
|
||||
if before
|
||||
insertBefore TextNode.new(data), before
|
||||
else
|
||||
appendChild TextNode.new(data)
|
||||
end
|
||||
end
|
||||
|
||||
def insertBefore node, refNode
|
||||
index = childNodes.index(refNode)
|
||||
if node.kind_of? TextNode and index>0 and
|
||||
childNodes[index-1].kind_of? TextNode
|
||||
childNodes[index-1].value += node.value
|
||||
else
|
||||
childNodes.insert index, node
|
||||
end
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
tree = "\n|%s%s" % [' '* indent, self.to_s]
|
||||
for child in childNodes
|
||||
tree += child.printTree(indent + 2)
|
||||
end
|
||||
return tree
|
||||
end
|
||||
|
||||
def hasContent
|
||||
return (childNodes.length > 0)
|
||||
end
|
||||
end
|
||||
|
||||
class Element < Node
|
||||
def to_s
|
||||
"<#{name}>"
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
tree = "\n|%s%s" % [' '* indent, self.to_s]
|
||||
indent += 2
|
||||
for name, value in attributes
|
||||
tree += "\n|%s%s=\"%s\"" % [' ' * indent, name, value]
|
||||
end
|
||||
for child in childNodes
|
||||
tree += child.printTree(indent)
|
||||
end
|
||||
return tree
|
||||
end
|
||||
end
|
||||
|
||||
class Document < Node
|
||||
def to_s
|
||||
"#document"
|
||||
end
|
||||
|
||||
def initialize
|
||||
super nil
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
tree = to_s
|
||||
for child in childNodes
|
||||
tree += child.printTree(indent + 2)
|
||||
end
|
||||
return tree
|
||||
end
|
||||
end
|
||||
|
||||
class DocumentType < Node
|
||||
def to_s
|
||||
"<!DOCTYPE %s>" % name
|
||||
end
|
||||
end
|
||||
|
||||
class DocumentFragment < Element
|
||||
def initialize
|
||||
super nil
|
||||
end
|
||||
|
||||
def printTree indent=0
|
||||
tree = ""
|
||||
for child in childNodes
|
||||
tree += child.printTree(indent+2)
|
||||
end
|
||||
return tree
|
||||
end
|
||||
end
|
||||
|
||||
class TextNode < Node
|
||||
def initialize value
|
||||
super nil
|
||||
@value = value
|
||||
end
|
||||
|
||||
def to_s
|
||||
'"%s"' % value
|
||||
end
|
||||
end
|
||||
|
||||
class CommentNode < Node
|
||||
def initialize value
|
||||
super nil
|
||||
@value = value
|
||||
end
|
||||
|
||||
def to_s
|
||||
"<!-- %s -->" % value
|
||||
end
|
||||
end
|
||||
|
||||
class TreeBuilder < Base::TreeBuilder
|
||||
def initialize
|
||||
@documentClass = Document
|
||||
@doctypeClass = DocumentType
|
||||
@elementClass = Element
|
||||
@commentClass = CommentNode
|
||||
@fragmentClass = DocumentFragment
|
||||
end
|
||||
|
||||
def testSerializer node
|
||||
node.printTree()
|
||||
end
|
||||
|
||||
def getFragment
|
||||
@document = super
|
||||
return @document.childNodes
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
26
vendor/plugins/HTML5lib/lib/html5/treewalkers.rb
vendored
Normal file
26
vendor/plugins/HTML5lib/lib/html5/treewalkers.rb
vendored
Normal file
|
@ -0,0 +1,26 @@
|
|||
require 'html5/treewalkers/base'
|
||||
|
||||
module HTML5
|
||||
module TreeWalkers
|
||||
|
||||
class << self
|
||||
def [](name)
|
||||
case name.to_s.downcase
|
||||
when 'simpletree' then
|
||||
require 'html5/treewalkers/simpletree'
|
||||
SimpleTree::TreeWalker
|
||||
when 'rexml' then
|
||||
require 'html5/treewalkers/rexml'
|
||||
REXML::TreeWalker
|
||||
when 'hpricot' then
|
||||
require 'html5/treewalkers/hpricot'
|
||||
Hpricot::TreeWalker
|
||||
else
|
||||
raise "Unknown TreeWalker #{name}"
|
||||
end
|
||||
end
|
||||
|
||||
alias :getTreeWalker :[]
|
||||
end
|
||||
end
|
||||
end
|
156
vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb
vendored
Normal file
156
vendor/plugins/HTML5lib/lib/html5/treewalkers/base.rb
vendored
Normal file
|
@ -0,0 +1,156 @@
|
|||
require 'html5/constants'
|
||||
module HTML5
|
||||
module TreeWalkers
|
||||
|
||||
module TokenConstructor
|
||||
def error(msg)
|
||||
return {:type => "SerializeError", :data => msg}
|
||||
end
|
||||
|
||||
def normalizeAttrs(attrs)
|
||||
attrs.to_a
|
||||
end
|
||||
|
||||
def emptyTag(name, attrs, hasChildren=false)
|
||||
error(_("Void element has children")) if hasChildren
|
||||
return({:type => :EmptyTag, :name => name, \
|
||||
:data => normalizeAttrs(attrs)})
|
||||
end
|
||||
|
||||
def startTag(name, attrs)
|
||||
return {:type => :StartTag, :name => name, \
|
||||
:data => normalizeAttrs(attrs)}
|
||||
end
|
||||
|
||||
def endTag(name)
|
||||
return {:type => :EndTag, :name => name, :data => []}
|
||||
end
|
||||
|
||||
def text(data)
|
||||
if data =~ /\A([#{SPACE_CHARACTERS.join('')}]+)/m
|
||||
yield({:type => :SpaceCharacters, :data => $1})
|
||||
data = data[$1.length .. -1]
|
||||
return if data.empty?
|
||||
end
|
||||
|
||||
if data =~ /([#{SPACE_CHARACTERS.join('')}]+)\Z/m
|
||||
yield({:type => :Characters, :data => data[0 ... -$1.length]})
|
||||
yield({:type => :SpaceCharacters, :data => $1})
|
||||
else
|
||||
yield({:type => :Characters, :data => data})
|
||||
end
|
||||
end
|
||||
|
||||
def comment(data)
|
||||
return {:type => :Comment, :data => data}
|
||||
end
|
||||
|
||||
def doctype(name)
|
||||
return {:type => :Doctype, :name => name, :data => name.upcase() == "HTML"}
|
||||
end
|
||||
|
||||
def unknown(nodeType)
|
||||
return error(_("Unknown node type: ") + nodeType.to_s)
|
||||
end
|
||||
|
||||
def _(str)
|
||||
str
|
||||
end
|
||||
end
|
||||
|
||||
class Base
|
||||
include TokenConstructor
|
||||
|
||||
def initialize(tree)
|
||||
@tree = tree
|
||||
end
|
||||
|
||||
def each
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
alias walk each
|
||||
end
|
||||
|
||||
class NonRecursiveTreeWalker < TreeWalkers::Base
|
||||
def node_details(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def first_child(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def next_sibling(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def parent(node)
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def each
|
||||
currentNode = @tree
|
||||
while currentNode != nil
|
||||
details = node_details(currentNode)
|
||||
hasChildren = false
|
||||
|
||||
case details.shift
|
||||
when :DOCTYPE
|
||||
yield doctype(*details)
|
||||
|
||||
when :TEXT
|
||||
text(*details) {|token| yield token}
|
||||
|
||||
when :ELEMENT
|
||||
name, attributes, hasChildren = details
|
||||
if VOID_ELEMENTS.include?(name)
|
||||
yield emptyTag(name, attributes.to_a, hasChildren)
|
||||
hasChildren = false
|
||||
else
|
||||
yield startTag(name, attributes.to_a)
|
||||
end
|
||||
|
||||
when :COMMENT
|
||||
yield comment(details[0])
|
||||
|
||||
when :DOCUMENT, :DOCUMENT_FRAGMENT
|
||||
hasChildren = true
|
||||
|
||||
when nil
|
||||
# ignore (REXML::XMLDecl is an example)
|
||||
|
||||
else
|
||||
yield unknown(details[0])
|
||||
end
|
||||
|
||||
firstChild = hasChildren ? first_child(currentNode) : nil
|
||||
if firstChild != nil
|
||||
currentNode = firstChild
|
||||
else
|
||||
while currentNode != nil
|
||||
details = node_details(currentNode)
|
||||
if details.shift == :ELEMENT
|
||||
name, attributes, hasChildren = details
|
||||
yield endTag(name) if !VOID_ELEMENTS.include?(name)
|
||||
end
|
||||
|
||||
if @tree == currentNode
|
||||
currentNode = nil
|
||||
else
|
||||
nextSibling = next_sibling(currentNode)
|
||||
if nextSibling != nil
|
||||
currentNode = nextSibling
|
||||
break
|
||||
end
|
||||
|
||||
currentNode = parent(currentNode)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
end
|
48
vendor/plugins/HTML5lib/lib/html5/treewalkers/hpricot.rb
vendored
Normal file
48
vendor/plugins/HTML5lib/lib/html5/treewalkers/hpricot.rb
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
require 'html5/treewalkers/base'
|
||||
require 'rexml/document'
|
||||
|
||||
module HTML5
|
||||
module TreeWalkers
|
||||
module Hpricot
|
||||
class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker
|
||||
|
||||
def node_details(node)
|
||||
case node
|
||||
when ::Hpricot::Elem
|
||||
if node.name.empty?
|
||||
[:DOCUMENT_FRAGMENT]
|
||||
else
|
||||
[:ELEMENT, node.name,
|
||||
node.attributes.map {|name,value| [name,value]},
|
||||
!node.empty?]
|
||||
end
|
||||
when ::Hpricot::Text
|
||||
[:TEXT, node.to_plain_text]
|
||||
when ::Hpricot::Comment
|
||||
[:COMMENT, node.content]
|
||||
when ::Hpricot::Doc
|
||||
[:DOCUMENT]
|
||||
when ::Hpricot::DocType
|
||||
[:DOCTYPE, node.target]
|
||||
when ::Hpricot::XMLDecl
|
||||
[nil]
|
||||
else
|
||||
[:UNKNOWN, node.class.inspect]
|
||||
end
|
||||
end
|
||||
|
||||
def first_child(node)
|
||||
node.children.first
|
||||
end
|
||||
|
||||
def next_sibling(node)
|
||||
node.next_node
|
||||
end
|
||||
|
||||
def parent(node)
|
||||
node.parent
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
48
vendor/plugins/HTML5lib/lib/html5/treewalkers/rexml.rb
vendored
Normal file
48
vendor/plugins/HTML5lib/lib/html5/treewalkers/rexml.rb
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
require 'html5/treewalkers/base'
|
||||
require 'rexml/document'
|
||||
|
||||
module HTML5
|
||||
module TreeWalkers
|
||||
module REXML
|
||||
class TreeWalker < HTML5::TreeWalkers::NonRecursiveTreeWalker
|
||||
|
||||
def node_details(node)
|
||||
case node
|
||||
when ::REXML::Document
|
||||
[:DOCUMENT]
|
||||
when ::REXML::Element
|
||||
if !node.name
|
||||
[:DOCUMENT_FRAGMENT]
|
||||
else
|
||||
[:ELEMENT, node.name,
|
||||
node.attributes.map {|name,value| [name,value]},
|
||||
node.has_elements? || node.has_text?]
|
||||
end
|
||||
when ::REXML::Text
|
||||
[:TEXT, node.value]
|
||||
when ::REXML::Comment
|
||||
[:COMMENT, node.string]
|
||||
when ::REXML::DocType
|
||||
[:DOCTYPE, node.name]
|
||||
when ::REXML::XMLDecl
|
||||
[nil]
|
||||
else
|
||||
[:UNKNOWN, node.class.inspect]
|
||||
end
|
||||
end
|
||||
|
||||
def first_child(node)
|
||||
node.children.first
|
||||
end
|
||||
|
||||
def next_sibling(node)
|
||||
node.next_sibling
|
||||
end
|
||||
|
||||
def parent(node)
|
||||
node.parent
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
48
vendor/plugins/HTML5lib/lib/html5/treewalkers/simpletree.rb
vendored
Normal file
48
vendor/plugins/HTML5lib/lib/html5/treewalkers/simpletree.rb
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
require 'html5/treewalkers/base'
|
||||
|
||||
module HTML5
|
||||
module TreeWalkers
|
||||
module SimpleTree
|
||||
class TreeWalker < HTML5::TreeWalkers::Base
|
||||
include HTML5::TreeBuilders::SimpleTree
|
||||
|
||||
def walk(node)
|
||||
case node
|
||||
when Document, DocumentFragment
|
||||
return
|
||||
|
||||
when DocumentType
|
||||
yield doctype(node.name)
|
||||
|
||||
when TextNode
|
||||
text(node.value) {|token| yield token}
|
||||
|
||||
when Element
|
||||
if VOID_ELEMENTS.include?(node.name)
|
||||
yield emptyTag(node.name, node.attributes, node.hasContent())
|
||||
else
|
||||
yield startTag(node.name, node.attributes)
|
||||
for child in node.childNodes
|
||||
walk(child) {|token| yield token}
|
||||
end
|
||||
yield endTag(node.name)
|
||||
end
|
||||
|
||||
when CommentNode
|
||||
yield comment(node.value)
|
||||
|
||||
else
|
||||
puts '?'
|
||||
yield unknown(node.class)
|
||||
end
|
||||
end
|
||||
|
||||
def each
|
||||
for child in @tree.childNodes
|
||||
walk(child) {|node| yield node}
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Add table
Add a link
Reference in a new issue